library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("readxl")
library(tidyverse)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')
otu <- read.table("ITS_OTUs_mod.txt", sep="\t", header=FALSE)
tax <- read.table("ITS_OTUs.UNITEv10_sh_99.wang.taxonomy", sep="\t", header=FALSE)
More (4) observations (OTUS) in tax? check what is the deal. Get OTU names
dif <- setdiff(tax$V1, otu$V1)
dif
## [1] "OTU57653" "OTU58305"
These OTUs are missing from OTU table but are present in tax table? Let’s remove them form tax table Also, lets tweek the table row names and columns
# modify otu table
colnames(otu)=otu[c(1),]
# erase the first row, as now it is doubled
otu=otu[-c(1),]
# make first column into rownames
rownames(otu) <- otu$`OTU ID`
otu <- otu[, -c(1)]
# let's make a copy of tax table
tax.orig <- tax
# change column names
colnames(tax)[1] <- "OTU"
colnames(tax)[2] <- "taxa"
# and modify the tax, where in the str_remove_all . means any single character
tax <- tax %>%
mutate(taxa = str_remove_all(taxa, ".__")) %>%
separate(taxa,
into = c("kingdom", "phylum", "class", "order", "family", "genus", "species"),
sep = ";")
Everything went well but I got a warning message, because there are an extra ; at the end of the line so the last column is not made. which is correct. Let’s check if we now have NAs, just to check everything is ok.
sum(is.na(tax$OTU))
## [1] 0
sum(is.na(tax$kingdom))
## [1] 0
sum(is.na(tax$phylum))
## [1] 0
sum(is.na(tax$class))
## [1] 0
sum(is.na(tax$order))
## [1] 0
sum(is.na(tax$family))
## [1] 0
sum(is.na(tax$genus))
## [1] 0
sum(is.na(tax$species))
## [1] 0
Let’s remove the parantheses and numbers
tax$kingdom <- sub("\\(.*", "", tax$kingdom)
tax$phylum <- sub("\\(.*", "", tax$phylum)
tax$class <- sub("\\(.*", "", tax$class)
tax$order <- sub("\\(.*", "", tax$order)
tax$family <- sub("\\(.*", "", tax$family)
tax$genus <- sub("\\(.*", "", tax$genus)
tax$species <- sub("\\(.*", "", tax$species)
Check unique values of the higher taxons
unique(tax$kingdom)
## [1] "Fungi"
unique(tax$phylum)
## [1] "Ascomycota" "Basidiomycota" "Fungi_unclassified"
## [4] "Rozellomycota" "Basidiobolomycota" "Chytridiomycota"
## [7] "Mortierellomycota" "unclassified" "Glomeromycota"
## [10] "Mucoromycota" "Zoopagomycota" "Monoblepharomycota"
## [13] "Olpidiomycota" "Kickxellomycota" "Entorrhizomycota"
## [16] "Aphelidiomycota" "Neocallimastigomycota"
“OTU57653” “OTU58305”
tax <- subset(tax, OTU!="OTU57653")
tax <- subset(tax, OTU!="OTU58305")
samples <- read.csv2('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024\\sample_data_updated_root_biomass.csv')
# change some column names
colnames(samples)[which(names(samples) == "production_type")] <- "sample_type"
x <- otu
x <- t(x)
x <- as.data.frame(x)
x <- rownames_to_column(x)
y <- samples[, c(1,2)]
x <- left_join(y, x, by = c("ID"="rowname"))
x <- x[, -c(1)]
rownames(x) <- x$sampleID
x <- x[, -c(1)]
x <- t(x)
otu <- x
rm(x)
rm(y)
library(dplyr)
otu <- as.data.frame(otu)
otu <- otu %>% mutate_if(is.character, as.numeric)
rownames(tax) <- tax$OTU
tax <- tax[, -c(1)]
rownames(samples) <- samples$sampleID
samples <- samples[, -c(1)]
samples <- sample_data(samples)
otu <- as.matrix(otu)
otu <- otu_table(otu, taxa_are_rows = TRUE)
tax <- tax_table(as.matrix(tax))
ps <- phyloseq(otu, tax, samples)
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 70198 taxa and 142 samples ]
## sample_data() Sample Data: [ 142 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 70198 taxa by 7 taxonomic ranks ]
Fungi_unclassified unclassified
ps <- subset_samples(ps, sampleID != "0ctrl-1")
ps <- subset_samples(ps, sampleID != "0ctrl-2")
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 70198 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 70198 taxa by 7 taxonomic ranks ]
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps),
MARGIN = ifelse(taxa_are_rows(ps), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps = prune_taxa((prev0 > 0), ps)
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 70198 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 70198 taxa by 7 taxonomic ranks ]
rm(prev0)
rm(tax.orig)
#I will remove two: "Fungi_unclassified" and "unclassified"
ps <- subset_taxa(ps, phylum != "Fungi_unclassified" & phylum != "unclassified")
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68186 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 68186 taxa by 7 taxonomic ranks ]
ps_sng <- ps
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps_sng, file='ps_phyloseq_with_sng')
ps <- filter_taxa(ps, function (x) {sum(x > 0) > 1}, prune=TRUE)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
##
## [[2]]
## [1] "2] Max. number of reads = 410851"
##
## [[3]]
## [1] "3] Total number of reads = 11539503"
##
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
##
## [[5]]
## [1] "5] Median number of reads = 79653.5"
##
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 22"
##
## [[11]]
## [1] "sampleID" "plot" "sampling_position"
## [4] "actual_sample_depth" "depth" "depth_numerical"
## [7] "vegetation" "sample_type" "root_mgg"
## [10] "pH_H2O" "EC_uScm" "C_g_per_kg"
## [13] "N_gkg" "TP_gkg" "Alox_mmolkg"
## [16] "Feox_mmolkg" "oxides_mmolkg" "PH2O_mgkg"
## [19] "Porg_mgkg" "DOC_mgkg" "Pinorg_mgkg"
## [22] "C_per_N"
readcount(ps)
## CG9.1_0to10 CG9.1_10to20 CG9.1_20to30 CG9.1_30to40 CG9.1_40to70
## 86263 86079 104971 70687 44
## CG9.2_0to10 CG9.2_10to20 CG9.2_20to30 CG9.2_30to40 CG9.2_40to80
## 93325 99797 71502 33354 12765
## CG9.3_0to10 CG9.3_10to20 CG9.3_20to30 CG9.3_30to40 CG9.3_40to70
## 80231 84314 95845 119875 8588
## CPO5.1_0to10 CPO5.1_10to20 CPO5.1_20to30 CPO5.1_30to40 CPO5.1_40to70
## 98378 70183 84869 104719 18979
## CPO5.2_0to10 CPO5.2_10to20 CPO5.2_20to30 CPO5.2_30to40 CPO5.2_40to70
## 71870 95165 58356 13410 7818
## CPO5.3_0to10 CPO5.3_10to20 CPO5.3_20to30 CPO5.3_30to40 CPO5.3_40to80
## 77865 69685 69812 100730 29285
## CR14.1_0to10 CR14.1_10to20 CR14.1_20to30 CR14.1_30to40 CR14.1_40to80
## 87835 83770 45199 191427 10410
## CR14.2_0to10 CR14.2_10to20 CR14.2_20to30 CR14.2_30to40 CR14.2_40to70
## 79191 80650 168434 108506 35667
## CR14.3_0to10 CR14.3_10to20 CR14.3_20to30 CR14.3_30to40 CR14.3_40to60
## 85581 66271 90307 206728 27639
## M1_0to10 M1_10to20 M1_20to30 M1_30to40 M1_40to60
## 20487 79126 80116 65933 9920
## M2_0to10 M2_10to20 M2_20to30 M2_30to40 M2_40to60
## 104802 92361 68349 48693 13643
## M3_0to10 M3_10to20 M3_20to30 M3_30to40 M3_40to60
## 89502 88371 96903 49946 65688
## NG2A1_0to10 NG2A1_10to20 NG2A1_20to30 NG2A1_30to40 NG2A1_40to70
## 63024 150089 215010 410851 21745
## NG2A2_0to10 NG2A2_10to20 NG2A2_20to30 NG2A2_30to40 NG2A2_40to70
## 122936 138014 150337 232098 28984
## NG2A3_0to10 NG2A3_10to20 NG2A3_20to30 NG2A3_30to40 NG2A3_40to70
## 70345 112821 151484 56781 48499
## NG2B1_0to10 NG2B1_10to20 NG2B1_20to30 NG2B1_30to40 NG2B1_40to70
## 71098 133953 156486 56105 45557
## NG2B2_0to10 NG2B2_10to20 NG2B2_20to30 NG2B2_30to40 NG2B2_40to70
## 65605 203928 161140 71344 811
## NG2B3_0to10 NG2B3_10to20 NG2B3_20to30 NG2B3_30to40 NG2B3_40to70
## 91494 103358 122182 526 18249
## NG3.2_0to10 NG3.2_10to20 NG3.2_20to30 NG3.2_30to40 NG3.2_40to70
## 30720 126078 74239 153319 28542
## NG3.3_0to10 NG3.3_10to20 NG3.3_20to30 NG3.3_30to40 NG3.3_40to60
## 15342 69130 56845 119149 72310
## OG10.1_0to10 OG10.1_10to20 OG10.1_20to30 OG10.1_30to40 OG10.1_40to80
## 65025 73499 63754 23026 9882
## OG10.2_0to10 OG10.2_10to20 OG10.2_20to30 OG10.2_30to40 OG10.2_40to70
## 107655 77838 90829 97084 32336
## OG10.3_0to10 OG10.3_10to20 OG10.3_20to30 OG10.3_30to40 OG10.3_40to70
## 75755 80589 99232 67396 27950
## OPO6.2_0to10 OPO6.2_10to20 OPO6.2_20to30 OPO6.2_30to40 OPO6.2_40to70
## 98465 84444 63563 103414 86941
## OPO6.3_0to10 OPO6.3_10to20 OPO6.3_20to30 OPO6.3_30to40 OPO6.3_40to70
## 103969 85383 66316 166823 61540
## OR13.1_0to10 OR13.1_10to20 OR13.1_20to30 OR13.1_30to40 OR13.1_40to80
## 94567 107459 112756 92093 20227
## OR13.2_0to10 OR13.2_10to20 OR13.2_20to30 OR13.2_30to40 OR13.2_40to60
## 113845 22672 29060 12432 15481
## OR13.3_0to10 OR13.3_10to20 OR13.3_20to30 OR13.3_30to40 OR13.3_40to60
## 101629 99800 91229 97335 235463
I noticed that there is a mistake in the taxonomy, so that sometimes I have “unclassified” annotation at lets say genus level, although I have annotation at lets say family level
In these cases I rather have at the genus level the annotation “family_unclassified”
tax <- as.data.frame(tax_table(ps))
I have no unclassified at phylum level
change first at level:
class
tax[tax$class == "unclassified", "phylum"]
## [1] "Chytridiomycota" "Basidiomycota" "Chytridiomycota"
## [4] "Monoblepharomycota" "Rozellomycota" "Basidiomycota"
## [7] "Chytridiomycota" "Rozellomycota" "Chytridiomycota"
## [10] "Rozellomycota" "Rozellomycota" "Chytridiomycota"
## [13] "Rozellomycota" "Rozellomycota" "Chytridiomycota"
## [16] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [19] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [22] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [25] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [28] "Basidiomycota" "Basidiomycota" "Basidiomycota"
## [31] "Rozellomycota" "Rozellomycota" "Basidiomycota"
## [34] "Basidiomycota" "Kickxellomycota" "Rozellomycota"
## [37] "Basidiomycota" "Basidiomycota" "Rozellomycota"
## [40] "Basidiomycota" "Basidiomycota" "Rozellomycota"
## [43] "Chytridiomycota" "Rozellomycota" "Rozellomycota"
## [46] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [49] "Basidiomycota" "Rozellomycota" "Rozellomycota"
## [52] "Chytridiomycota" "Rozellomycota" "Rozellomycota"
## [55] "Chytridiomycota" "Chytridiomycota" "Chytridiomycota"
## [58] "Chytridiomycota" "Basidiomycota" "Rozellomycota"
## [61] "Rozellomycota" "Chytridiomycota" "Rozellomycota"
## [64] "Rozellomycota" "Rozellomycota" "Basidiomycota"
## [67] "Basidiomycota" "Rozellomycota" "Rozellomycota"
## [70] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [73] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [76] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [79] "Rozellomycota" "Rozellomycota" "Chytridiomycota"
## [82] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [85] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [88] "Chytridiomycota" "Rozellomycota" "Rozellomycota"
## [91] "Basidiomycota" "Rozellomycota" "Rozellomycota"
## [94] "Rozellomycota" "Rozellomycota" "Basidiomycota"
## [97] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [100] "Chytridiomycota" "Rozellomycota" "Rozellomycota"
## [103] "Basidiomycota" "Chytridiomycota" "Rozellomycota"
## [106] "Basidiomycota" "Glomeromycota" "Rozellomycota"
## [109] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [112] "Rozellomycota" "Rozellomycota" "Rozellomycota"
## [115] "Rozellomycota" "Rozellomycota" "Chytridiomycota"
## [118] "Basidiomycota" "Rozellomycota" "Olpidiomycota"
## [121] "Chytridiomycota" "Rozellomycota" "Monoblepharomycota"
tax$class <- ifelse(tax$class == "unclassified", paste(tax$phylum, sep = "_", "unclassified"), as.character(tax$class))
And same for all rest ranks, but I have to replace the “_unclassified_unclassified” with “_unclassified” afterwards
order
tax$order <- ifelse(tax$order == "unclassified", paste(tax$class, sep = "_", "unclassified"), as.character(tax$order))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
family
tax$family <- ifelse(tax$family == "unclassified", paste(tax$order, sep = "_", "unclassified"), as.character(tax$family))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
genus
tax$genus <- ifelse(tax$genus == "unclassified", paste(tax$family, sep = "_", "unclassified"), as.character(tax$genus))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
species
tax$species <- ifelse(tax$species == "unclassified", paste(tax$genus, sep = "_", "unclassified"), as.character(tax$species))
tax[] <- lapply(tax, gsub, pattern = "unclassified_unclassified", replacement = "unclassified", fixed = TRUE)
Save the new modified ps as the “final version”
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
tax_table(ps) <- tax_table(as.matrix(tax))
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 22 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
# Column indices to be converted numeric
i <- c(6, 9:22)
meta[, i] <- apply(meta[, i], 2, function(x) as.numeric(as.character(x)))
meta$depth_numerical[meta$depth_numerical == 0] <- 5
meta$depth_numerical[meta$depth_numerical == 10] <- 15
meta$depth_numerical[meta$depth_numerical == 20] <- 25
meta$depth_numerical[meta$depth_numerical == 30] <- 35
meta$depth_numerical[meta$depth_numerical == 40] <- 60
# change natural_grass to meadow
meta <- data.frame(lapply(meta, function(x) {gsub("Natural_grass", "meadow", x)}))
rownames(meta) <- meta$sampleID
sample_data(ps) <- sample_data(meta)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')
div <- microbiome::alpha(ps, index = "all")
# Assign the estimated diversity to sample metadata
sample_data(ps)$observed <- div$observed
sample_data(ps)$chao1 <- div$chao1
sample_data(ps)$shannon <- div$diversity_shannon
meta <- meta(ps)
meta$sample_type <- as.factor(meta$sample_type)
meta$sample_type <- factor(meta$sample_type, levels = c("forest", "meadow", "organic", "conventional"))
# Column indices to be converted numeric
i <- c(6, 9:25)
meta[, i] <- apply(meta[, i], 2, function(x) as.numeric(as.character(x)))
sample_data(ps) <- sample_data(meta)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 25 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
I will use the one without singletons, but just for comparison, I also calculate with singletons
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_phyloseq_with_sng')
div <- microbiome::alpha(ps_sng, index = "all")
# Assign the estimated diversity to sample metadata
sample_data(ps)$observed_sng <- div$observed
sample_data(ps)$chao1_sng <- div$chao1
sample_data(ps)$shannon_sng <- div$diversity_shannon
meta <- meta(ps)
save(ps, file='ps_FINAL')
# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
# OTU richness
OTU_rich <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="OTU richness") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
OTU_rich
# shannon
shannon <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(shannon, na.rm = TRUE), se = (sd(shannon, na.rm = TRUE)/sqrt(length((shannon))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Shannon") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
shannon
# OTU richness
OTU_rich_sng <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(observed_sng, na.rm = TRUE), se = (sd(observed_sng, na.rm = TRUE)/sqrt(length((observed_sng))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="OTU richness") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
OTU_rich_sng
# shannon
shannon_sng <- meta %>%
dplyr::group_by(sample_type, depth_numerical) %>%
dplyr::summarise(mean = mean(shannon_sng, na.rm = TRUE), se = (sd(shannon_sng, na.rm = TRUE)/sqrt(length((shannon_sng))))) %>%
dplyr::ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=18),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Shannon") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
shannon
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("RColorBrewer") # nice color options
library(multcompView)
library(rcompanion)
library(car)
library(multcomp)
library(stringr)
library(ggrepel)
library(MicEco)
library(metagMisc)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
#FG <- parse_funguild()
#attr(FG, "DownloadDate") # Check when the database was downloaded
#setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
#save FG
#write.csv2(FG, file = "FUNGuild_31_05_2024.csv", row.names = FALSE)
I have previously uploaded the FUNGuild database and will use that version here for continuity. I used a version downloaded: “Fri May 31 19:45:41 2024”
I need to annotate separately at different taxonomic level
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')
FG <- read.csv2("FUNGuild_31_05_2024.csv")
# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus" "Species" "Variety" "Family" "Order"
## [6] "Phylum" "Form" "Subspecies"
I will annotate with Species, Genus, Family, Order, Phylum level
fg <- FG[FG$taxonomicLevel == "Species", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# fg_sp doesn't have underscores, so lets add them
fg$taxon <- sub(" ", "_", fg$taxon)
# merge tables
colnames(fg)[1] <- "species"
FG_tax_table <- merge(tax_table, fg, by = "species", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_sp"
colnames(FG_tax_table)[2] <- "guild_sp"
# save with new name
FUNGuild_sp <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Genus", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"
# save with new name
FUNGuild_gen <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Family", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"
# save with new name
FUNGuild_fam <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Order", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"
# save with new name
FUNGuild_ord <- FG_tax_table
fg <- FG[FG$taxonomicLevel == "Phylum", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[, c(8, 12, 13)]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"
# save with new name
FUNGuild_phy <- FG_tax_table
species Genus Family Order Phylum
x <- left_join(rownames_to_column(FUNGuild_sp), rownames_to_column(FUNGuild_gen), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA: now from genus
y <- x %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_gen))
# get the value from another column if NA: now from genus
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_gen))
# same for family
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_fam))
# get the value from another column if NA
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_fam))
# same for order
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_ord))
# get the value from another column if NA
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_ord))
# same for phylum
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_sp = coalesce(trophicMode_sp,trophicMode_phy))
# get the value from another column if NA
y <- y %>%
mutate(guild_sp = coalesce(guild_sp,guild_phy))
# then rename the sp columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"
# remove the rest of the columns
y <- y[, -c(4:11)]
# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# how many of different trophic modes and NAs?
table(y2$trophicMode, useNA = "ifany")
##
## Pathotroph Pathotroph-Pathotroph-Saprotroph
## 20 7
## Pathotroph-Saprotroph-Symbiotroph Saprotroph
## 7 37
## Symbiotroph Pathotroph
## 148 1480
## Pathotroph-Saprotroph Pathotroph-Saprotroph-Symbiotroph
## 1973 977
## Pathotroph-Symbiotroph Saprotroph
## 98 4805
## Saprotroph-Symbiotroph Symbiotroph
## 1179 636
## <NA>
## 9243
So approximately 44.8% are NAs
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(y3)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 10 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps_FG, file = 'ps_FG_with_NAs')
20610 taxa; contains all the NAs
and remove also “|”
FG_tax_table <- as.data.frame(tax_table(ps_FG))
FG_tax_table$trophicMode <- gsub(" ", "", FG_tax_table$trophicMode, fixed = TRUE)
FG_tax_table$guild <- gsub("|", "", FG_tax_table$guild, fixed = TRUE)
I will add a column “FUNGuild” where I curate the some symbiotroph and pathotroph fungi according to my research interests. I am especially interested of AMF as thay are important mycorrhiza in arable soils, but I alo wasnt to separate Ectomycorrhiza as they are important in forest. This leaves one more relatively big symbiotrophic guild, endophytes, which I will also specify. So the following curation will be done for FUNGuild column:
FG_tax_table <- FG_tax_table %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
FG_tax_table <- FG_tax_table %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ FG_tax_table$trophicMode))
# remove the FG column
FG_tax_table <- FG_tax_table[, c(1:10, 12)]
Check the different written forms, if I have empty spaces?
unique(FG_tax_table$trophicMode)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Symbiotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Pathotroph-Symbiotroph"
## [9] "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Endophyte"
## [9] "Ectomycorrhizal" "Pathotroph-Symbiotroph"
## [11] "Plant Pathogen" "Symbiotroph"
## [13] "Pathotroph-Pathotroph-Saprotroph"
There is a bug in the FUNGuild data, so that in addition to Pathotroph-Saprotroph there is the “wrong” one Pathotroph-Pathotroph-Saprotroph etc. I will correct these.
Also I will name Symbiotroh into Other Symbiotroph as I have extracted the AMF, EcM etc. from the symbiotroph trophic mode
# change some names
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]
Check again
unique(FG_tax_table$trophicMode)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Symbiotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Other Pathotroph"
## [7] "Saprotroph" "Endophyte"
## [9] "Ectomycorrhizal" "Pathotroph-Symbiotroph"
## [11] "Plant Pathogen" "Other Symbiotroph"
ps_FG <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps_FG, file = 'ps_FG_with_NAs')
Check proportions of NAs
# lets calculate how big percentage of sequences where assigned (are not unknown)
ps_FG_RA <- transform(ps_FG, "compositional")
FG_RA_TmG <- aggregate_rare(ps_FG_RA, level = 'FUNGuild', detection = 0/100, prevalence = 0/100)
FG_RA_TmG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 12 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 12 taxa by 2 taxonomic ranks ]
glom <- tax_glom(FG_RA_TmG, taxrank = 'FUNGuild')
percentages <- psmelt(glom)
df <- percentages %>%
group_by(OTU) %>%
summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 12 × 2
## OTU name
## <chr> <dbl>
## 1 Arbuscular Mycorrhizal 0.0240
## 2 Ectomycorrhizal 0.0305
## 3 Endophyte 0.0206
## 4 Other Pathotroph 0.0323
## 5 Other Symbiotroph 0.000308
## 6 Pathotroph-Saprotroph 0.0850
## 7 Pathotroph-Saprotroph-Symbiotroph 0.0324
## 8 Pathotroph-Symbiotroph 0.00580
## 9 Plant Pathogen 0.0225
## 10 Saprotroph 0.292
## 11 Saprotroph-Symbiotroph 0.150
## 12 Unknown 0.305
Note, although 44.8% of OTUs were not assigned (info given above), 30.5% of reads were not assigned
Check also the percentage of NAs within the four sample types: forest, meadow, organic and conventional. Do they differ?
FG_RA_Tm <- aggregate_rare(ps_FG_RA, level = 'trophicMode', detection = 0/100, prevalence = 0/100)
glom <- tax_glom(FG_RA_Tm, taxrank = 'trophicMode')
percentages <- psmelt(glom)
df <- percentages %>%
group_by(sample_type, OTU) %>%
summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 32 × 3
## # Groups: sample_type [4]
## sample_type OTU name
## <fct> <chr> <dbl>
## 1 forest Pathotroph 0.0104
## 2 forest Pathotroph-Saprotroph 0.0841
## 3 forest Pathotroph-Saprotroph-Symbiotroph 0.0315
## 4 forest Pathotroph-Symbiotroph 0.000181
## 5 forest Saprotroph 0.154
## 6 forest Saprotroph-Symbiotroph 0.176
## 7 forest Symbiotroph 0.261
## 8 forest Unknown 0.283
## 9 meadow Pathotroph 0.0266
## 10 meadow Pathotroph-Saprotroph 0.0568
## # ℹ 22 more rows
management NAs Forest 28.3 meadow 23.8 organic 31.8 conventional 35.9
# this is a robust way of removing NAs from the phyloseq object.
## aggregation makes NAs into "Unknown"
ps_FG_x <- aggregate_rare(ps_FG, level = 'guild', detection = 0, prevalence = 0)
ps_FG_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 145 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 145 taxa by 2 taxonomic ranks ]
# 145 taxa
# and "Unknown" can be removed like this
allTaxa = taxa_names(ps_FG_x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_FG_x_pruned <- prune_taxa(myTaxa, ps_FG_x)
ps_FG_x_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 144 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 144 taxa by 2 taxonomic ranks ]
# 144 taxa
lets calculate how big percentage of sequences belongs to which trophic mode when non-assigned are not included
x <- aggregate_rare(ps_FG, level = 'trophicMode', detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa
# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa
x2 <- transform(x, 'compositional')
glom <- tax_glom(x2, taxrank = 'trophicMode')
percentages <- psmelt(glom)
df <- percentages %>%
group_by(OTU) %>%
summarise_at(vars(Abundance), list(name = mean))
df
## # A tibble: 7 × 2
## OTU name
## <chr> <dbl>
## 1 Pathotroph 0.0786
## 2 Pathotroph-Saprotroph 0.132
## 3 Pathotroph-Saprotroph-Symbiotroph 0.0453
## 4 Pathotroph-Symbiotroph 0.0105
## 5 Saprotroph 0.402
## 6 Saprotroph-Symbiotroph 0.218
## 7 Symbiotroph 0.114
Here, I will check the number of reads and OTUs and construct venn-diagrams for sample types (or management type) and soil layers.
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library("MicEco")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- microbiome::transform(ps, "compositional")
meta <- meta(ps)
summarize_phyloseq(ps)
## [[1]]
## [1] "1] Min. number of reads = 44"
##
## [[2]]
## [1] "2] Max. number of reads = 410851"
##
## [[3]]
## [1] "3] Total number of reads = 11539503"
##
## [[4]]
## [1] "4] Average number of reads = 82425.0214285714"
##
## [[5]]
## [1] "5] Median number of reads = 79653.5"
##
## [[6]]
## [1] "7] Sparsity = 0.928178415470992"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 28"
##
## [[11]]
## [1] "sampleID" "plot" "sampling_position"
## [4] "actual_sample_depth" "depth" "depth_numerical"
## [7] "vegetation" "sample_type" "root_mgg"
## [10] "pH_H2O" "EC_uScm" "C_g_per_kg"
## [13] "N_gkg" "TP_gkg" "Alox_mmolkg"
## [16] "Feox_mmolkg" "oxides_mmolkg" "PH2O_mgkg"
## [19] "Porg_mgkg" "DOC_mgkg" "Pinorg_mgkg"
## [22] "C_per_N" "observed" "chao1"
## [25] "shannon" "observed_sng" "chao1_sng"
## [28] "shannon_sng"
We obtained 11662127 fungal reads which clustered into 31714 OTUs in the 140 samples.
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
# Note, relative is false, because I am using an alredy relative abundance transformed phyloseq object.
venn_no_prev <- ps_venn(
ps_RA,
"sample_type",
fraction = 0,
weight = FALSE,
relative = FALSE,
plot = TRUE,
quantities = list(cex=0.7),
fill = MyPalette
)
venn_no_prev
How many OTUs are shared by all sample types? I will pick the shared by all from the figure and divide it with the total OTU number to get the percentage
shared_by_all <- 2570
total_OTUs <- 20610
shared_by_all/total_OTUs
## [1] 0.1246967
12.5% of OTUs were shared by all management
meta$new_depth <- meta$depth
meta$new_depth <- gsub("...", "-", meta$new_depth, fixed = TRUE)
meta$new_depth[meta$new_depth=="40-"] <- "40-80"
meta$new_depth[meta$new_depth=="0-10"] <- "0-10 cm"
meta$new_depth[meta$new_depth=="10-20"] <- "10-20 cm"
meta$new_depth[meta$new_depth=="20-30"] <- "20-30 cm"
meta$new_depth[meta$new_depth=="30-40"] <- "30-40 cm"
meta$new_depth[meta$new_depth=="40-80"] <- "40-80 cm"
sample_data(ps) <- sample_data(meta)
ps_RA <- transform(ps, "compositional")
# create your own color palette for sample types
MyPalette <- list(c('0-10 cm' = "#387212", '10-20 cm' = "#ADC476", '20-30 cm' = "#D8D2BA",'30-40 cm' = "#907852", '40-80 cm' = "#6A4C3A"))
venn_DEPTH <- ps_venn(
ps_RA,
"new_depth",
fraction = 0,
weight = FALSE,
relative = TRUE,
quantities = list(cex=0.7),
plot = TRUE,
fill = MyPalette[[1]]
)
venn_DEPTH
Again, how many OTUs are shared by all layers?
shared_by_all <- 1007
total_OTUs <- 20610
shared_by_all/total_OTUs
## [1] 0.04885978
4.9% of OTUs were shared by all layers
How much of the OTUs in the dataset were found in the first, first two or first three soil layers?
ps_x <- subset_samples(ps, depth!="30...40" & depth!="40...")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20130 taxa and 84 samples ]
## sample_data() Sample Data: [ 84 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20130 taxa by 7 taxonomic ranks ]
In the first three layers: 20130 OTUs
OTUs <- 20130
total_OTUs <- 20610
OTUs/total_OTUs
## [1] 0.9767103
97.7% of all OTUs were found in the first 3 soil layers
How much of the OTUs in the dataset were found in the first, first two or first three soil layers?
ps_x <- subset_samples(ps, depth!="20...30" & depth!="30...40" & depth!="40...")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 18392 taxa and 56 samples ]
## sample_data() Sample Data: [ 56 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 18392 taxa by 7 taxonomic ranks ]
In the first three layers: 18392 OTUs
OTUs <- 18392
total_OTUs <- 20610
OTUs/total_OTUs
## [1] 0.8923823
89.2% of all OTUs were found in the first 2 soil layers
ps_x <- subset_samples(ps, depth=="0...10")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14737 taxa and 28 samples ]
## sample_data() Sample Data: [ 28 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 14737 taxa by 7 taxonomic ranks ]
OTUs <- 14737
total_OTUs <- 20610
OTUs/total_OTUs
## [1] 0.7150412
71.5% of all OTUs were found in the first soil layer
library(ggpubr)
fig <- ggarrange(venn_no_prev, venn_DEPTH,
ncol = 2, nrow = 1)
fig
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(goeveg)
library(metagMisc)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
hist(meta$root_mgg)
#perform Shapiro-Wilk Test
shapiro.test(meta$root_mgg)
##
## Shapiro-Wilk normality test
##
## data: meta$root_mgg
## W = 0.35847, p-value < 2.2e-16
#log transformation
meta <- meta %>%
mutate(log_root = log10(root_mgg))
hist(meta$log_root)
#perform Shapiro-Wilk Test
shapiro.test(meta$log_root)
##
## Shapiro-Wilk normality test
##
## data: meta$log_root
## W = 0.98109, p-value = 0.04971
#now is normally distributed since p value is more than 0.05
sample_data(ps) <- sample_data(meta)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file='ps_FINAL')
ps_RA <- microbiome::transform(ps, "compositional")
To visualize beta diversity, I will do a PCoA which is metric instead of e.g. non-metric NMDS
I will be following somewhat this tutorial:
OTU = as(otu_table(ps_RA), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)
bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
## 'dist' Named num [1:9730] 0.367 0.694 0.967 0.723 0.47 ...
## - attr(*, "maxdist")= num 1
## - attr(*, "Size")= int 140
## - attr(*, "Labels")= chr [1:140] "CG9.1_0to10" "CG9.1_10to20" "CG9.1_20to30" "CG9.1_30to40" ...
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "bray"
## - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
# use k = 3 so we'll get calculations for three axes
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
ordiplot(scores(pcoa),display="sites", type="points")
Let’s first make PCoA ordination with axes 1 and 2, and later for 1 and 3.
# Post-Hoc Projections of environmental variables
# envfit in vegan projects points onto vectors that have maximum correlation with corresponding environmental variables
pcoa.env12 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:2), permutations = 999)
# main effects
pcoa.env12
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## pH_H2O 0.89803 -0.43993 0.4996 0.001 ***
## C_g_per_kg -0.92647 0.37636 0.5577 0.001 ***
## N_gkg -0.95072 0.31005 0.5692 0.001 ***
## TP_gkg -0.93787 -0.34699 0.5026 0.001 ***
## depth_numerical 0.98087 0.19465 0.5300 0.001 ***
## DOC_mgkg -0.76599 0.64286 0.4138 0.001 ***
## Pinorg_mgkg 0.17893 -0.98386 0.0740 0.003 **
## Porg_mgkg -0.99894 0.04605 0.5753 0.001 ***
## log_root -0.80130 0.59827 0.4884 0.001 ***
## C_per_N -0.97551 0.21996 0.4224 0.001 ***
## Feox_mmolkg -0.84349 0.53715 0.4468 0.001 ***
## Alox_mmolkg -0.67701 0.73598 0.3176 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2
## sample_typeforest 0.0679 0.1867
## sample_typemeadow -0.0229 0.1780
## sample_typeorganic -0.0098 -0.1293
## sample_typeconventional 0.0006 -0.1050
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.2013 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
ef12.adj <- pcoa.env12
pvals.adj <- p.adjust (pcoa.env12$vectors$pvals, method = 'bonferroni')
ef12.adj$vectors$pvals <- pvals.adj
ef12.adj
##
## ***VECTORS
##
## Dim1 Dim2 r2 Pr(>r)
## pH_H2O 0.89803 -0.43993 0.4996 0.012 *
## C_g_per_kg -0.92647 0.37636 0.5577 0.012 *
## N_gkg -0.95072 0.31005 0.5692 0.012 *
## TP_gkg -0.93787 -0.34699 0.5026 0.012 *
## depth_numerical 0.98087 0.19465 0.5300 0.012 *
## DOC_mgkg -0.76599 0.64286 0.4138 0.012 *
## Pinorg_mgkg 0.17893 -0.98386 0.0740 0.036 *
## Porg_mgkg -0.99894 0.04605 0.5753 0.012 *
## log_root -0.80130 0.59827 0.4884 0.012 *
## C_per_N -0.97551 0.21996 0.4224 0.012 *
## Feox_mmolkg -0.84349 0.53715 0.4468 0.012 *
## Alox_mmolkg -0.67701 0.73598 0.3176 0.012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2
## sample_typeforest 0.0679 0.1867
## sample_typemeadow -0.0229 0.1780
## sample_typeorganic -0.0098 -0.1293
## sample_typeconventional 0.0006 -0.1050
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.2013 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
To plot (classical) mds (which is equivalent to PCoA) with ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.
site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, soil_type = meta$sample_type) #add grouping variable "soil_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta$depth) #add grouping variable of depth
head(site.scrs)
## Dim1 Dim2 Dim3 soil_type depth
## CG9.1_0to10 -0.32486951 -0.16939677 0.057117223 conventional 0...10
## CG9.1_10to20 -0.35683666 -0.18451022 0.010039481 conventional 10...20
## CG9.1_20to30 0.10483906 -0.26374886 -0.239911662 conventional 20...30
## CG9.1_30to40 0.49569905 -0.18888465 -0.132639765 conventional 30...40
## CG9.1_40to70 0.03411974 -0.01592131 -0.009914955 conventional 40...
## CG9.2_0to10 -0.24690800 -0.12969297 0.145831464 conventional 0...10
I will be following somewhat this tutorial for fitting the environmental variables etc:
To show environmental extrinsic variables another datasheet needs to be created
Citation from the jkzorz github
“Extracting the required information from the envfit result is a bit more complicated. The envfit output contains information on the length of the segments for each variable. The segments are scaled to the r2 value, so that the environmental variables with a longer segment are more strongly correlated with the data than those with a shorter segment. You can extract this information with scores. Then these lengths are further scaled to fit the plot. This is done with a multiplier that is analysis specific, and can be accessed using the command ordiArrowMul(en). Below I multiply the scores by this multiplier to keep the coordinates in the correct proportion.”
Because my data contained continuous and categorical environmental variables, Im extracting the information from both separately using the vectors and factors options respectively.
# first categorial variables
env.scores_cat12 <- as.data.frame(scores(pcoa.env12, display = "factors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cat12 <- cbind(env.scores_cat12, env.variables = rownames(env.scores_cat12)) #and then gives them their names
env.scores_cat12 <- cbind(env.scores_cat12, pval = pcoa.env12$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat12 <- subset(env.scores_cat12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cat12
## Dim1 Dim2 env.variables pval
## sample_typeforest 0.0169631533 0.04667273 sample_typeforest 0.001
## sample_typemeadow -0.0057255852 0.04448833 sample_typemeadow 0.001
## sample_typeorganic -0.0024526659 -0.03233332 sample_typeorganic 0.001
## sample_typeconventional 0.0001453261 -0.02625329 sample_typeconventional 0.001
# all were significant
# then continous variables
env.scores_cont12 <- as.data.frame(scores(pcoa.env12, display = "vectors"))*0.25 # multiply the envfit scores to keep them in the frame of the ordination
env.scores_cont12 <- cbind(env.scores_cont12, env.variables = rownames(env.scores_cont12)) #and then gives them their names
env.scores_cont12 <- cbind(env.scores_cont12, pval = pcoa.env12$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont12 <- subset(env.scores_cont12, pval<=0.05) #subset data to show variables significant at 0.05
sig.env.scores_cont12
## Dim1 Dim2 env.variables pval
## pH_H2O 0.15868133 -0.077735176 pH_H2O 0.001
## C_g_per_kg -0.17297071 0.070265684 C_g_per_kg 0.001
## N_gkg -0.17931771 0.058479126 N_gkg 0.001
## TP_gkg -0.16622910 -0.061501047 TP_gkg 0.001
## depth_numerical 0.17852191 0.035426555 depth_numerical 0.001
## DOC_mgkg -0.12318024 0.103379224 DOC_mgkg 0.001
## Pinorg_mgkg 0.01216617 -0.066897767 Pinorg_mgkg 0.003
## Porg_mgkg -0.18942328 0.008732727 Porg_mgkg 0.001
## log_root -0.14000008 0.104527314 log_root 0.001
## C_per_N -0.15851066 0.035740927 C_per_N 0.001
## Feox_mmolkg -0.14095001 0.089759046 Feox_mmolkg 0.001
## Alox_mmolkg -0.09537901 0.103687400 Alox_mmolkg 0.001
# all were significant
A new dataset containing species data also needs to be made to look at species vectors.
# wascores computes Weighted Averages scores of species for ordination configuration or for environmental variables.
species.scores12 <- wascores(pcoa$points[,1:2], OTU)
species.scores13 <- wascores(pcoa$points[,1:3], OTU)
# Ordiselect gives me more control for the significant OTUs to display
# I'm using 0.1 % of the most abundant and 100 % of the best fitting OTUs
# NOTE! the higher the ablim maybe more of the low diversity sample' OTUs shown..?
# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs
ordis12 <- ordiselect(OTU, species.scores12, ablim = 0.001, fitlim = 1, choices = c(1,2), method = "axes", env = pcoa.env12)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis12.species.scores <- species.scores12[ordis12, ]
ordis12.species.scores <- cbind(ordis12.species.scores, Species = rownames(ordis12.species.scores)) #add species names to dataframe
# OTUs into species names
OTU.sp <- as.data.frame(tax_table(ps_RA))
OTU.sp$OTU <- rownames(OTU.sp)
# remove other taxa
OTU.sp <- OTU.sp[ , -(1:6)]
try <- merge(data.frame(ordis12.species.scores), data.frame(OTU.sp),
by = 0, all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -(3)]
# remove "try"
ordis12.species.scores <- try
rm(try)
head(ordis12.species.scores)
## V1 V2 species
## OTU12776 0.126946387694976 -0.121543372900751 Clonostachys_rosea
## OTU139 0.200755588616414 0.046445493642642 Entomortierella_parvispora
## OTU13985 -0.237582025036352 0.0288145443505883 Saitozyma_podzolica
## OTU19296 -0.295916773006835 -0.149458330721953 Cladorrhinum_unclassified
## OTU20886 -0.25280082681134 -0.121756560222692 Paraphaeosphaeria_unclassified
## OTU23599 -0.22805912031261 0.169675633910535 Paraphaeosphaeria_viciae
## OTU
## OTU12776 OTU12776
## OTU139 OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
# make values numeric
ordis12.species.scores$V1 <- as.numeric(ordis12.species.scores$V1)
ordis12.species.scores$V2 <- as.numeric(ordis12.species.scores$V2)
Now we have the relevant information for plotting the ordination in ggplot
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
site.scrs$new_depth <- site.scrs$depth
site.scrs$new_depth <- gsub("...", "-", site.scrs$new_depth, fixed = TRUE)
site.scrs$new_depth[site.scrs$new_depth=="40-"] <- "40-80"
site.scrs$new_depth[site.scrs$new_depth=="0-10"] <- "0-10 cm"
site.scrs$new_depth[site.scrs$new_depth=="10-20"] <- "10-20 cm"
site.scrs$new_depth[site.scrs$new_depth=="20-30"] <- "20-30 cm"
site.scrs$new_depth[site.scrs$new_depth=="30-40"] <- "30-40 cm"
site.scrs$new_depth[site.scrs$new_depth=="40-80"] <- "40-80 cm"
#First lets run the PCoA with phyloseq to get the axis percentages (plot_ordination gives axis percentages)
GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p2 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape="depth")
p2
Remember to change the axis percentages accordingly below!!!
pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$new_depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC2 (8.0%)", x = "PC1 (20.0%)")
pcoa.plot
First, modify the species names
unique(ordis12.species.scores$species)
## [1] "Clonostachys_rosea" "Entomortierella_parvispora"
## [3] "Saitozyma_podzolica" "Cladorrhinum_unclassified"
## [5] "Paraphaeosphaeria_unclassified" "Paraphaeosphaeria_viciae"
## [7] "Solicoccozyma_terricola" "Pseudeurotium_unclassified"
## [9] "Pseudeurotium_hygrophilum" "Clavulina_cinerea"
## [11] "Fusarium_asiaticum" "Pseudogymnoascus_unclassified"
## [13] "Pleotrichocladium_opacum" "Leotiomycetes_unclassified"
## [15] "Rhexocercosporidium_panacis" "Helotiales_unclassified"
## [17] "Pseudogymnoascus_roseus" "Solicoccozyma_terrea"
## [19] "Mortierella_antarctica" "Glutinoglossum_heptaseptatum"
I want to remove the “unclassified” from the end
ordis12.species.scores$species <- gsub("_unclassified","",as.character(ordis12.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis12.species.scores, aes(x=V1,y=V2,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.position = c(0.88, 0.8), legend.text = element_text(size = 12))
pcoa.plot_OTU
First, simplify the names
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "pH_H2O"] <- "pH"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_g_per_kg"] <- "C"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "N_gkg"] <- "N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "TP_gkg"] <- "P-tot"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "depth_numerical"] <- "depth"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "DOC_mgkg"] <- "DOC"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Pinorg_mgkg"] <- "P-inorg"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Porg_mgkg"] <- "P-org"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "log_root"] <- "log root"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "C_per_N"] <- "C/N"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Feox_mmolkg"] <- "Fe-ox"
sig.env.scores_cont12$env.variables[sig.env.scores_cont12$env.variables == "Alox_mmolkg"] <- "Al-ox"
…then plot
p1 <- pcoa.plot_OTU +
geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim2), data = sig.env.scores_cont12, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont12, aes(x = Dim1, y = Dim2), colour = "blue", fontface = "bold", label = sig.env.scores_cont12$env.variables, segment.size = 0.2, box.padding = unit(0.1, "lines"), point.padding = (0.1), force = 1, max.time = 30, nudge_y = 0.00, nudge_x = 0.00)
p1
This was saved with width 1200 and height 900
pcoa.env13 <- envfit(pcoa, meta[ ,c("pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "sample_type", "depth_numerical", "DOC_mgkg", "Pinorg_mgkg", "Porg_mgkg", "log_root", "C_per_N", "Feox_mmolkg", "Alox_mmolkg")], na.rm = TRUE, choices=c(1:3), permutations = 999)
pcoa.env13
##
## ***VECTORS
##
## Dim1 Dim2 Dim3 r2 Pr(>r)
## pH_H2O 0.71582 -0.34967 0.60442 0.5947 0.001 ***
## C_g_per_kg -0.70297 0.28453 -0.65182 0.6894 0.001 ***
## N_gkg -0.74553 0.24218 -0.62092 0.6801 0.001 ***
## TP_gkg -0.91468 -0.33863 -0.22064 0.5108 0.001 ***
## depth_numerical 0.82942 0.16519 0.53364 0.5935 0.001 ***
## DOC_mgkg -0.57514 0.48141 -0.66141 0.5386 0.001 ***
## Pinorg_mgkg 0.12414 -0.67859 0.72395 0.1308 0.001 ***
## Porg_mgkg -0.84201 0.03811 -0.53812 0.6442 0.001 ***
## log_root -0.70696 0.52696 -0.47172 0.5404 0.001 ***
## C_per_N -0.78189 0.17543 -0.59823 0.4935 0.001 ***
## Feox_mmolkg -0.67315 0.42760 -0.60335 0.5371 0.001 ***
## Alox_mmolkg -0.40124 0.43448 -0.80637 0.5726 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2 Dim3
## sample_typeforest 0.0679 0.1867 0.1190
## sample_typemeadow -0.0229 0.1780 -0.0916
## sample_typeorganic -0.0098 -0.1293 0.0392
## sample_typeconventional 0.0006 -0.1050 0.0041
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.1997 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
ef13.adj <- pcoa.env13
pvals.adj <- p.adjust (pcoa.env13$vectors$pvals, method = 'bonferroni')
ef13.adj$vectors$pvals <- pvals.adj
ef13.adj
##
## ***VECTORS
##
## Dim1 Dim2 Dim3 r2 Pr(>r)
## pH_H2O 0.71582 -0.34967 0.60442 0.5947 0.012 *
## C_g_per_kg -0.70297 0.28453 -0.65182 0.6894 0.012 *
## N_gkg -0.74553 0.24218 -0.62092 0.6801 0.012 *
## TP_gkg -0.91468 -0.33863 -0.22064 0.5108 0.012 *
## depth_numerical 0.82942 0.16519 0.53364 0.5935 0.012 *
## DOC_mgkg -0.57514 0.48141 -0.66141 0.5386 0.012 *
## Pinorg_mgkg 0.12414 -0.67859 0.72395 0.1308 0.012 *
## Porg_mgkg -0.84201 0.03811 -0.53812 0.6442 0.012 *
## log_root -0.70696 0.52696 -0.47172 0.5404 0.012 *
## C_per_N -0.78189 0.17543 -0.59823 0.4935 0.012 *
## Feox_mmolkg -0.67315 0.42760 -0.60335 0.5371 0.012 *
## Alox_mmolkg -0.40124 0.43448 -0.80637 0.5726 0.012 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## Dim1 Dim2 Dim3
## sample_typeforest 0.0679 0.1867 0.1190
## sample_typemeadow -0.0229 0.1780 -0.0916
## sample_typeorganic -0.0098 -0.1293 0.0392
## sample_typeconventional 0.0006 -0.1050 0.0041
##
## Goodness of fit:
## r2 Pr(>r)
## sample_type 0.1997 0.001 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## 1 observation deleted due to missingness
# first categorial variables
env.scores_cat13 <- as.data.frame(scores(pcoa.env13, display = "factors"))*0.25
env.scores_cat13 <- cbind(env.scores_cat13, env.variables = rownames(env.scores_cat13)) #and then gives them their names
env.scores_cat13 <- cbind(env.scores_cat13, pval = pcoa.env13$factors$pvals) # add pvalues to dataframe
sig.env.scores_cat13 <- subset(env.scores_cat13, pval<=0.05) #subset data to show variables significant at 0.05
# then continous variables
env.scores_cont13 <- as.data.frame(scores(pcoa.env13, display = "vectors"))*0.25
env.scores_cont13 <- cbind(env.scores_cont13, env.variables = rownames(env.scores_cont13)) #and then gives them their names
env.scores_cont13 <- cbind(env.scores_cont13, pval = pcoa.env13$vectors$pvals) # add pvalues to dataframe
sig.env.scores_cont13 <- subset(env.scores_cont13, pval<=0.05) #subset data to show variables significant at 0.05
# The species scores of the 0.1% most abundant and 100% of the best fitting OTUs
ordis13 <- ordiselect(OTU, species.scores13, ablim = 0.001, fitlim = 1, choices = c(1,3), method = "axes", env = pcoa.env13)
## [1] "21 species selected (0.1% of total number of species)."
## [1] "All species selected which belong to the 0.1% most abundant species."
ordis13.species.scores <- species.scores13[ordis13, ]
ordis13.species.scores <- cbind(ordis13.species.scores, Species = rownames(ordis13.species.scores)) #add species names to dataframe
# change into sp names
try <- merge(data.frame(ordis13.species.scores), data.frame(OTU.sp),
by = 0, all = FALSE)
#add row names
samp2 <- try[,-1]
rownames(samp2) <- try[,1]
try <- samp2
rm(samp2)
# remove the "Species"
try <- try[ , -c(2, 4)]
# remove "try"
ordis13.species.scores <- try
rm(try)
head(ordis13.species.scores)
## V1 V3 species
## OTU12776 0.126946387694976 -0.00534028987033588 Clonostachys_rosea
## OTU139 0.200755588616414 -0.0370071455091096 Entomortierella_parvispora
## OTU13985 -0.237582025036352 -0.0956487846132982 Saitozyma_podzolica
## OTU19296 -0.295916773006835 0.120579023660146 Cladorrhinum_unclassified
## OTU20886 -0.25280082681134 0.026574191820344 Paraphaeosphaeria_unclassified
## OTU23599 -0.22805912031261 -0.175344391490801 Paraphaeosphaeria_viciae
## OTU
## OTU12776 OTU12776
## OTU139 OTU139
## OTU13985 OTU13985
## OTU19296 OTU19296
## OTU20886 OTU20886
## OTU23599 OTU23599
ordis13.species.scores$V1 <- as.numeric(ordis13.species.scores$V1)
ordis13.species.scores$V3 <- as.numeric(ordis13.species.scores$V3)
#First lets run the PCoA in with phyloseq to get the axis percentages
GP.ord <- ordinate(ps_RA, "PCoA", "bray", k = 3)
p2 = plot_ordination(ps_RA, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
p2
Remember to change the axis percentages accordingly!!
pcoa.plot <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$soil_type), shape = factor(site.scrs$depth)), size = 6, alpha = 0.6, stroke = 1.5) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.position = "right", legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (5.8%)", x = "PC1 (20.0%)")
pcoa.plot
# modify the species names
ordis13.species.scores$species <- gsub("_unclassified","",as.character(ordis13.species.scores$species))
pcoa.plot_OTU <- pcoa.plot + ggrepel::geom_text_repel(data = ordis13.species.scores, aes(x=V1,y=V3,label=species), alpha=0.7, cex = 3.5, direction = "both", segment.size = 0.2, max.overlaps = Inf) + theme(legend.text = element_text(size = 12)) + theme(legend.position = c(0.85, 0.8), legend.text = element_text(size = 12))# + theme(legend.position="none")# if problems, this might help
pcoa.plot_OTU
I will not add env. variables here. Just a simple PCoA
pcoa.plot_OTU +
geom_segment(aes(x = 0, y = 0, xend = Dim1, yend = Dim3), data = sig.env.scores_cont13, size =1, alpha = 0.5, colour = "grey30") + ggrepel::geom_text_repel(data = sig.env.scores_cont13, aes(x = Dim1, y = Dim3), colour = "blue", fontface = "bold", label = sig.env.scores_cont13$env.variables, max.overlaps=Inf, direction = "y", segment.size = 0.2, box.padding = unit(0.5, "lines"), point.padding = (1), force = 1, max.time = 30, nudge_y = 0.01, nudge_x = 0.01) + theme(legend.position = c(0.91, 0.8), legend.text = element_text(size = 12)) #+ theme(legend.position = "none")
Here I do permutational analysis of variance or PERMANOVA. With PERMANOVA, I want to check how much the main treatment factors, management type (here sample_type) and soil layer (depth), are responsible for differences in fungal communities. In addition, I will check how soil layers differ within management type (4.5) and in which soil layers we see a management type effect (4.6)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps_RA)
ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")
Check that variance homogeneity assumptions hold (to ensure the reliability of the results). If groups have signif. different spreads the permanova result may be potentially explained by that, rtaher than the groups.
Betadisper first calculates the average distance of group members to the group centroid in multivariate space (generated by a distance matrix). Then, an ANOVA is done to test if the dispersions (variances) of groups are different.
anova(betadisper(ps_RA_bray, meta$sample_type))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.05437 0.018125 1.247 0.2953
## Residuals 136 1.97679 0.014535
We see that the ANOVA p-value is not significant meaning that the homogeneity of variance assumption is met
anova(betadisper(ps_RA_bray, meta$depth))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 4 0.41023 0.102556 9.276 1.184e-06 ***
## Residuals 135 1.49258 0.011056
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We see that the ANOVA p-value is highly significant meaning that homogeneity of variance assumption is NOT met
I’ll do post hoc analysis with Tukey’s test to see which groups differ in relation to their variances
TukeyHSD(betadisper(ps_RA_bray, meta$depth))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## 10...20-0...10 -0.028389112 -0.106091062 0.04931284 0.8502993
## 20...30-0...10 0.041806442 -0.035895508 0.11950839 0.5722606
## 30...40-0...10 0.051717780 -0.025984170 0.12941973 0.3549246
## 40...-0...10 0.130756030 0.053054080 0.20845798 0.0000749
## 20...30-10...20 0.070195554 -0.007506396 0.14789750 0.0971647
## 30...40-10...20 0.080106892 0.002404942 0.15780884 0.0397902
## 40...-10...20 0.159145142 0.081443192 0.23684709 0.0000009
## 30...40-20...30 0.009911338 -0.067790612 0.08761329 0.9966478
## 40...-20...30 0.088949588 0.011247638 0.16665154 0.0161603
## 40...-30...40 0.079038250 0.001336300 0.15674020 0.0440806
Dispersions differ significantly between 40… and all other, and between 30…40 and 10…20
The latter is not a problem at all, because I am not interested of comparing layers if they are not consecutive, but I will keep in mind, that the consecutive layers 30-40 cmd and 40-80 cm do not have similar dispersions.
First, I will do PERMANOVA so that I include all management types (later without forest)
# first with just soil type and strata option
adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Df SumOfSqs R2 F Pr(>F)
## sample_type 3 5.591 0.10399 5.2615 1e-04 ***
## Residual 136 48.175 0.89601
## Total 139 53.766 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# then with just depth and strata option
adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Df SumOfSqs R2 F Pr(>F)
## depth 4 9.705 0.1805 7.4335 1e-04 ***
## Residual 135 44.061 0.8195
## Total 139 53.766 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Depth has a larger effect. So, let’s put it first in the model
For the full model it matters which “by” option we choose. When by=“terms” significance for each term is calculated sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone.
final <- adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ depth * sample_type, data = meta, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 4 9.705 0.18050 9.1871 1e-04 ***
## sample_type 3 5.591 0.10399 7.0576 1e-04 ***
## depth:sample_type 12 6.780 0.12611 2.1396 1e-04 ***
## Residual 120 31.690 0.58940
## Total 139 53.766 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
write.csv2(final, "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\permanova_soiltype_and_depth.csv")
I will not use this, rather the one above with forest
# subset samples
ps_RA <- microbiome::transform(ps, "compositional")
ps_x <- subset_samples(ps_RA, sample_type != "forest")
meta_subset <- meta(ps_x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")
set.seed(777)
final <- adonis2(formula = b ~ depth*sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ depth * sample_type, data = meta_subset, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## depth 4 9.800 0.21235 9.7433 1e-04 ***
## sample_type 2 3.927 0.08509 7.8083 1e-04 ***
## depth:sample_type 8 4.763 0.10320 2.3676 1e-04 ***
## Residual 110 27.660 0.59936
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Pairwise PERMANOVA I will only do so that forest is excluded due to too few replicates for forest. But I will not use these result, this is just to check
set.seed(777)
pair.mod<-pairwise.adonis(b, factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 2.351620 6.722317 0.07492358 0.001 0.003
## 2 conventional vs organic 1 1.000537 2.947677 0.03429618 0.010 0.030
## 3 meadow vs organic 1 2.574066 7.371501 0.08634616 0.001 0.003
## sig
## 1 *
## 2 .
## 3 *
I will do pairwise permanova analysis of depth for all management types separately, except for forest which has too few replicates
# subset samples
x <- "meadow"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 11032 taxa and 40 samples ]
## sample_data() Sample Data: [ 40 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 11032 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0...10 vs 10...20 1 0.6686591 4.418415 0.2398912 0.001 0.01 *
## 2 0...10 vs 20...30 1 1.2787641 6.259368 0.3089617 0.001 0.01 *
## 3 0...10 vs 30...40 1 2.1571845 9.795664 0.4116575 0.001 0.01 *
## 4 0...10 vs 40... 1 1.6755335 5.447545 0.2801148 0.002 0.02 .
## 5 10...20 vs 20...30 1 0.3728923 1.802396 0.1140584 0.048 0.48
## 6 10...20 vs 30...40 1 1.6735250 7.511019 0.3491708 0.001 0.01 *
## 7 10...20 vs 40... 1 1.5401662 4.965605 0.2618216 0.002 0.02 .
## 8 20...30 vs 30...40 1 0.9160107 3.321638 0.1917624 0.001 0.01 *
## 9 20...30 vs 40... 1 1.0302087 2.837037 0.1684998 0.001 0.01 *
## 10 30...40 vs 40... 1 0.7004325 1.847860 0.1166000 0.012 0.12
# subset samples
x <- "organic"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14151 taxa and 40 samples ]
## sample_data() Sample Data: [ 40 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 14151 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0...10 vs 10...20 1 0.1366228 0.906114 0.06078808 0.443 1.00
## 2 0...10 vs 20...30 1 0.4849847 2.278740 0.13998260 0.007 0.07
## 3 0...10 vs 30...40 1 2.0284059 8.323851 0.37286806 0.001 0.01 *
## 4 0...10 vs 40... 1 2.3642953 11.171511 0.44381567 0.002 0.02 .
## 5 10...20 vs 20...30 1 0.4214687 2.119591 0.13149163 0.016 0.16
## 6 10...20 vs 30...40 1 2.1185424 9.223078 0.39715141 0.001 0.01 *
## 7 10...20 vs 40... 1 2.4397574 12.343810 0.46856585 0.002 0.02 .
## 8 20...30 vs 30...40 1 1.0503864 3.600278 0.20455802 0.001 0.01 *
## 9 20...30 vs 40... 1 1.4017114 5.397392 0.27825350 0.001 0.01 *
## 10 30...40 vs 40... 1 0.5095377 1.753656 0.11131738 0.041 0.41
# subset samples
x <- "conventional"
ps_RA_subset <- subset_samples(ps_RA, sample_type == x)
meta_subset <- subset(meta, sample_type == x)
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 13863 taxa and 45 samples ]
## sample_data() Sample Data: [ 45 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 13863 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 0...10 vs 10...20 1 0.1170223 0.8087735 0.04811615 0.757 1.00
## 2 0...10 vs 20...30 1 0.7762987 3.5720248 0.18250665 0.003 0.03 .
## 3 0...10 vs 30...40 1 2.1200130 8.7764519 0.35422553 0.001 0.01 *
## 4 0...10 vs 40... 1 1.6494331 5.3721690 0.25136283 0.001 0.01 *
## 5 10...20 vs 20...30 1 0.7642509 4.0496264 0.20198014 0.007 0.07
## 6 10...20 vs 30...40 1 2.2368859 10.5042323 0.39632283 0.001 0.01 *
## 7 10...20 vs 40... 1 1.8451677 6.6271158 0.29288381 0.001 0.01 *
## 8 20...30 vs 30...40 1 0.6735640 2.3585234 0.12847021 0.025 0.25
## 9 20...30 vs 40... 1 0.7552291 2.1512623 0.11851861 0.013 0.13
## 10 30...40 vs 40... 1 0.4996687 1.3314104 0.07682066 0.105 1.00
I will analyse these without forest as forest has too few replicates
# define soil layer to be analysed
x <- "0...10"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 13638 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 13638 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 1.5452073 9.547816 0.3889477 0.001 0.003
## 2 conventional vs organic 1 0.7360756 4.347365 0.2247006 0.001 0.003
## 3 meadow vs organic 1 1.5542597 9.915270 0.4146000 0.002 0.006
## sig
## 1 *
## 2 *
## 3 *
# subset samples
x <- "10...20"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 15128 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 15128 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 1.3202292 9.871317 0.3968956 0.001 0.003
## 2 conventional vs organic 1 0.6464922 5.141142 0.2552557 0.001 0.003
## 3 meadow vs organic 1 1.3555168 9.325285 0.3997930 0.002 0.006
## sig
## 1 *
## 2 *
## 3 *
# subset samples
x <- "20...30"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14304 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 14304 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.8814463 3.381681 0.1839702 0.001 0.003
## 2 conventional vs organic 1 0.4094517 1.567927 0.0946363 0.114 0.342
## 3 meadow vs organic 1 0.8348207 3.206261 0.1863427 0.002 0.006
## sig
## 1 *
## 2
## 3 *
# subset samples
x <- "30...40"
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 5256 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 5256 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.7196829 2.3881246 0.13734228 0.006 0.018
## 2 conventional vs organic 1 0.2610497 0.8266673 0.05223256 0.707 1.000
## 3 meadow vs organic 1 0.5459411 1.7774404 0.11265708 0.012 0.036
## sig
## 1 .
## 2
## 3 .
# subset samples
x <- "40..."
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == x)
meta_subset <- subset(meta, depth == x & sample_type != "forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2487 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 2487 taxa by 7 taxonomic ranks ]
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs meadow 1 0.4663823 1.030147 0.06426309 0.353 1.000
## 2 conventional vs organic 1 0.7337456 2.062743 0.12089163 0.011 0.033
## 3 meadow vs organic 1 1.0846039 2.992358 0.17610025 0.002 0.006
## sig
## 1
## 2 .
## 3 *
PERMANOVA with soil properties will be done with only meadow, organic and conventional management types excluding forest
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
otu <- abundances(ps_RA_nf)
meta <- meta(ps_RA_nf)
Note: adonis cannot handle or account for NA or blanks in your data. Use na.omit(meta) %>% before each run where such variables are used where NAs exist
ps_RA_bray <- phyloseq::distance(ps_RA_nf, method = "bray")
I will use the following soil properties
“log_root”
“pH_H2O”
“C_g_per_kg”
“N_gkg”
“TP_gkg”
“Alox_mmolkg”
“Feox_mmolkg”
“PH2O_mgkg”
“Porg_mgkg”
“DOC_mgkg”
“Pinorg_mgkg”
“C_per_N”
final <- adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ log_root, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## log_root 1 5.369 0.11634 16.194 1e-04 ***
## Residual 123 40.780 0.88366
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ pH_H2O, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## pH_H2O 1 6.041 0.1309 18.526 1e-04 ***
## Residual 123 40.108 0.8691
## Total 124 46.149 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ C_g_per_kg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## C_g_per_kg 1 6.482 0.14046 20.1 1e-04 ***
## Residual 123 39.667 0.85954
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ N_gkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## N_gkg 1 6.647 0.14404 20.698 1e-04 ***
## Residual 123 39.502 0.85596
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ TP_gkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## TP_gkg 1 5.559 0.12046 16.846 1e-04 ***
## Residual 123 40.590 0.87954
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ Alox_mmolkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Alox_mmolkg 1 3.713 0.08045 10.762 1e-04 ***
## Residual 123 42.437 0.91955
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ Feox_mmolkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Feox_mmolkg 1 4.853 0.10517 14.456 1e-04 ***
## Residual 123 41.296 0.89483
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ DOC_mgkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## DOC_mgkg 1 5.300 0.11484 15.957 1e-04 ***
## Residual 123 40.850 0.88516
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ Pinorg_mgkg, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## Pinorg_mgkg 1 0.699 0.01515 1.8918 0.0305 *
## Residual 123 45.450 0.98485
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
final <- adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ C_per_N, data = meta, permutations = 9999, method = "bray")
## Df SumOfSqs R2 F Pr(>F)
## C_per_N 1 4.909 0.10637 14.641 1e-04 ***
## Residual 123 41.240 0.89363
## Total 124 46.149 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
colSums(is.na(meta))
## sampleID plot sampling_position actual_sample_depth
## 0 0 0 0
## depth depth_numerical vegetation sample_type
## 0 0 0 0
## root_mgg pH_H2O EC_uScm C_g_per_kg
## 0 0 0 0
## N_gkg TP_gkg Alox_mmolkg Feox_mmolkg
## 0 0 0 0
## oxides_mmolkg PH2O_mgkg Porg_mgkg DOC_mgkg
## 0 1 1 0
## Pinorg_mgkg C_per_N observed chao1
## 0 0 0 0
## shannon observed_sng chao1_sng shannon_sng
## 0 0 0 0
## log_root
## 0
These are NA:
Porg_mgkg for sample NG2A2_30to40
PH2O_mgkg for sample NG2B3_0to10
m <- meta %>% drop_na(Porg_mgkg)
x <- subset_samples(ps_RA_nf, sampleID!="NG2A2_30to40")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19817 taxa and 124 samples ]
## sample_data() Sample Data: [ 124 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 19817 taxa by 7 taxonomic ranks ]
otu <- abundances(x)
b <- phyloseq::distance(x, method = "bray")
final <- adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ Porg_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## Porg_mgkg 1 6.645 0.14528 20.736 1e-04 ***
## Residual 122 39.095 0.85472
## Total 123 45.739 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m <- meta %>% drop_na(PH2O_mgkg)
x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(x),
MARGIN = ifelse(taxa_are_rows(x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
x = prune_taxa((prev0 > 0), x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19819 taxa and 124 samples ]
## sample_data() Sample Data: [ 124 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 19819 taxa by 7 taxonomic ranks ]
otu <- abundances(x)
b <- phyloseq::distance(x, method = "bray")
final <- adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
final
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ PH2O_mgkg, data = m, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## PH2O_mgkg 1 1.935 0.04229 5.3866 1e-04 ***
## Residual 122 43.824 0.95771
## Total 123 45.759 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
remove these samples
Porg_mgkg (NG2A2_30to40) PH2O_mgkg (NG2B3_0to10)
m <- meta %>% drop_na(PH2O_mgkg)
m <- m %>% drop_na(Porg_mgkg)
ps_x <- subset_samples(ps_RA_nf, sampleID!="NG2B3_0to10")
ps_x <- subset_samples(ps_x, sampleID!="NG2A2_30to40")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_x = prune_taxa((prev0 > 0), ps_x)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19816 taxa and 123 samples ]
## sample_data() Sample Data: [ 123 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 19816 taxa by 7 taxonomic ranks ]
b <- phyloseq::distance(ps_x, method = "bray")
final <- adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)
final
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = b ~ log_root + pH_H2O + C_g_per_kg + N_gkg + TP_gkg + Alox_mmolkg + Feox_mmolkg + PH2O_mgkg + Porg_mgkg + DOC_mgkg + Pinorg_mgkg + C_per_N, data = m, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 12 15.333 0.33811 4.6826 1e-04 ***
## Residual 110 30.016 0.66189
## Total 122 45.350 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
run for all layers and all of these separately:
# Define the environmental variables as a character vector, not as a factor
env <- c("log_root", "pH_H2O", "C_g_per_kg", "N_gkg", "TP_gkg", "Alox_mmolkg", "Feox_mmolkg",
"PH2O_mgkg", "Porg_mgkg", "DOC_mgkg", "Pinorg_mgkg", "C_per_N")
# Convert the 'depth' column to a factor
meta$depth <- as.factor(meta$depth)
# Initialize an empty list to store the results
adonis_results <- list()
# Loop over each depth and environmental variable
for (i in levels(meta$depth)) {
for (j in env) {
# Subset samples
ps_x <- subset_samples(ps_RA, sample_type != "forest")
ps_x <- subset_samples(ps_x, sampleID != "NG2B3_0to10")
ps_x <- subset_samples(ps_x, sampleID != "NG2A2_30to40")
ps_x <- subset_samples(ps_x, depth == i)
meta_subset <- meta(ps_x)
# Define prevalence of each taxa (in how many samples did each taxa appear at least once)
prev0 <- apply(X = otu_table(ps_x),
MARGIN = ifelse(taxa_are_rows(ps_x), yes = 1, no = 2),
FUN = function(x) { sum(x > 0) })
# Execute prevalence filter, using `prune_taxa()` function
ps_x <- prune_taxa((prev0 > 0), ps_x)
# Calculate Bray-Curtis distance
b <- phyloseq::distance(ps_x, method = "bray")
# Run adonis2 analysis
formula <- as.formula(paste("b ~", j))
adonis_result <- adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
# Store the result in the list with a descriptive name
result_name <- paste("depth", i, "env", j, sep = "_")
adonis_results[[result_name]] <- adonis_result
}
}
# View the list of results
adonis_results
## $depth_0...10_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7966 0.13653 3.4787 0.0014 **
## Residual 22 5.0376 0.86347
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6628 0.1136 2.8195 0.0054 **
## Residual 22 5.1714 0.8864
## Total 23 5.8342 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.5213 0.26075 7.7599 1e-04 ***
## Residual 22 4.3129 0.73925
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.4712 0.25216 7.4181 1e-04 ***
## Residual 22 4.3630 0.74784
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2863 0.04908 1.1354 0.2882
## Residual 22 5.5479 0.95092
## Total 23 5.8342 1.00000
##
## $depth_0...10_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.0063 0.17248 4.5855 1e-04 ***
## Residual 22 4.8279 0.82752
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7390 0.12666 3.1908 0.0025 **
## Residual 22 5.0952 0.87334
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5390 0.09239 2.2396 0.0204 *
## Residual 22 5.2951 0.90761
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7905 0.13549 3.4479 9e-04 ***
## Residual 22 5.0437 0.86451
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.4018 0.24027 6.9575 1e-04 ***
## Residual 22 4.4324 0.75973
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_0...10_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3372 0.05779 1.3495 0.1689
## Residual 22 5.4970 0.94221
## Total 23 5.8342 1.00000
##
## $depth_0...10_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1766 0.20168 5.5579 1e-04 ***
## Residual 22 4.6575 0.79832
## Total 23 5.8342 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7280 0.14084 3.7704 0.0013 **
## Residual 23 4.4407 0.85916
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.9411 0.18209 5.1204 2e-04 ***
## Residual 23 4.2275 0.81791
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.2399 0.23989 7.2586 1e-04 ***
## Residual 23 3.9287 0.76011
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.9946 0.19242 5.4802 1e-04 ***
## Residual 23 4.1741 0.80758
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2530 0.04895 1.1838 0.243
## Residual 23 4.9156 0.95105
## Total 24 5.1686 1.00000
##
## $depth_10...20_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.2957 0.25069 7.6951 1e-04 ***
## Residual 23 3.8729 0.74931
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1620 0.22483 6.6708 1e-04 ***
## Residual 23 4.0066 0.77517
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3596 0.06957 1.7196 0.0688 .
## Residual 23 4.8091 0.93043
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2818 0.05453 1.3265 0.1728
## Residual 23 4.8868 0.94547
## Total 24 5.1686 1.00000
##
## $depth_10...20_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 1.1131 0.21535 6.3126 1e-04 ***
## Residual 23 4.0555 0.78465
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3936 0.07615 1.8959 0.05 *
## Residual 23 4.7750 0.92385
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_10...20_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5862 0.11342 2.9425 0.005 **
## Residual 23 4.5824 0.88658
## Total 24 5.1686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7395 0.10346 2.6541 0.0029 **
## Residual 23 6.4086 0.89654
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.8450 0.11821 3.0833 2e-04 ***
## Residual 23 6.3032 0.88179
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7142 0.09991 2.5531 0.0049 **
## Residual 23 6.4340 0.90009
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6962 0.09739 2.4816 0.0067 **
## Residual 23 6.4520 0.90261
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4651 0.06506 1.6005 0.0711 .
## Residual 23 6.6831 0.93494
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6839 0.09567 2.4332 0.0059 **
## Residual 23 6.4643 0.90433
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7618 0.10658 2.7437 0.0021 **
## Residual 23 6.3863 0.89342
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4951 0.06927 1.7117 0.0493 *
## Residual 23 6.6530 0.93073
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6443 0.09014 2.2786 0.0105 *
## Residual 23 6.5038 0.90986
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.6601 0.09235 2.3401 0.0085 **
## Residual 23 6.4881 0.90765
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_20...30_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3030 0.04239 1.0182 0.4027
## Residual 23 6.8451 0.95761
## Total 24 7.1482 1.00000
##
## $depth_20...30_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4538 0.06348 1.5591 0.0878 .
## Residual 23 6.6944 0.93652
## Total 24 7.1482 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5268 0.07033 1.6643 0.0328 *
## Residual 22 6.9641 0.92967
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4728 0.06311 1.482 0.0711 .
## Residual 22 7.0181 0.93689
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2850 0.03804 0.87 0.5167
## Residual 22 7.2060 0.96196
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2756 0.03678 0.8402 0.6539
## Residual 22 7.2154 0.96322
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4086 0.05455 1.2693 0.1997
## Residual 22 7.0823 0.94545
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5886 0.07858 1.8761 0.0193 *
## Residual 22 6.9023 0.92142
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5944 0.07936 1.8963 0.018 *
## Residual 22 6.8965 0.92064
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3054 0.04077 0.935 0.5159
## Residual 22 7.1855 0.95923
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5900 0.07877 1.881 0.0144 *
## Residual 22 6.9009 0.92123
## Total 23 7.4909 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_30...40_env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3720 0.04966 1.1495 0.3077
## Residual 22 7.1189 0.95034
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2953 0.03942 0.9028 0.5857
## Residual 22 7.1956 0.96058
## Total 23 7.4909 1.00000
##
## $depth_30...40_env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4801 0.06409 1.5066 0.1147
## Residual 22 7.0108 0.93591
## Total 23 7.4909 1.00000
##
## $depth_40..._env_log_root
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4089 0.04044 0.9693 0.4857
## Residual 23 9.7019 0.95956
## Total 24 10.1108 1.00000
##
## $depth_40..._env_pH_H2O
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5118 0.05062 1.2263 0.1537
## Residual 23 9.5990 0.94938
## Total 24 10.1108 1.00000
##
## $depth_40..._env_C_g_per_kg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4801 0.04748 1.1465 0.2979
## Residual 23 9.6307 0.95252
## Total 24 10.1108 1.00000
##
## $depth_40..._env_N_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3803 0.03761 0.8989 0.6571
## Residual 23 9.7305 0.96239
## Total 24 10.1108 1.00000
##
## $depth_40..._env_TP_gkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3102 0.03068 0.7281 0.9339
## Residual 23 9.8005 0.96932
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Alox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3621 0.03582 0.8544 0.7201
## Residual 23 9.7487 0.96418
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Feox_mmolkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.8108 0.08019 2.0052 0.0017 **
## Residual 23 9.3000 0.91981
## Total 24 10.1108 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $depth_40..._env_PH2O_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.4612 0.04561 1.0992 0.2755
## Residual 23 9.6496 0.95439
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Porg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3897 0.03854 0.9221 0.5867
## Residual 23 9.7211 0.96146
## Total 24 10.1108 1.00000
##
## $depth_40..._env_DOC_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3038 0.03005 0.7125 0.8952
## Residual 23 9.8070 0.96995
## Total 24 10.1108 1.00000
##
## $depth_40..._env_Pinorg_mgkg
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2886 0.02854 0.6757 0.9697
## Residual 23 9.8222 0.97146
## Total 24 10.1108 1.00000
##
## $depth_40..._env_C_per_N
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = formula, data = meta_subset, permutations = 9999, method = "bray", by = NULL)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.5202 0.05145 1.2476 0.1383
## Residual 23 9.5906 0.94855
## Total 24 10.1108 1.00000
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type!="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 19820 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 19820 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)
I will do heatmap for fungal genera using only meadow, organic and conventional soils (without forest). I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)
# make a dataframe of the tax-table
tax <- as.data.frame(ps_RA@tax_table)
# calculate how many genus
length(unique(tax$genus))
## [1] 943
tax$genus <- sub(".*_.*", "Unclassified_genus",tax$genus)
# calculate how many genus
length(unique(tax$genus))
## [1] 710
# of which 1 is "Unclassified_genus"
# edit the phyloseq object
ps_genus <- ps
tax <- as.matrix(tax) # convert it into a matrix.
tax <- tax_table(tax) # convert into phyloseq compatible file.
tax_table(ps_genus) <- tax # incroporate into phyloseq Object
ps_genus <- aggregate_rare(ps_genus, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 710 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 710 taxa by 2 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")
# remove unclassified
allTaxa = taxa_names(ps_genus_RA)
badTaxa = c("Unclassified_genus")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_genus_RA_pruned <- prune_taxa(myTaxa, ps_genus_RA)
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 709 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 709 taxa by 2 taxonomic ranks ]
# sample wise filtering according to most abund. genera
# Initialize an empty list to store the taxa
abund.taxa <- list()
for (i in meta_nf$sample_type) {
for (j in meta_nf$depth) {
x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])
# Calculate taxa mean of the selected samples
top20 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 20)
result_name <- paste("sample_type", i, "depth", j, sep = "_")
abund.taxa[[result_name]] <- top20
}
}
management_layer <- c(names(abund.taxa))
all_top20 <- c()
for (i in management_layer) {
top20 <- c(names(abund.taxa[[i]]))
all_top20 <- c(all_top20, top20)
}
all_top20_unique <- unique(all_top20)
length(all_top20_unique)
## [1] 95
# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)
# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top20_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
ps_genus_nf_HETAMAP <- ps_RA_nf_genus_pruned
save(ps_genus_nf_HETAMAP, file='ps_genus_nf_HETAMAP_all_top20')
OTU = as(otu_table(ps_RA_nf_genus_pruned), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA_nf_genus_pruned)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)
bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
## 'dist' Named num [1:7750] 0.29 0.558 0.934 0.43 0.397 ...
## - attr(*, "maxdist")= num 1
## - attr(*, "Size")= int 125
## - attr(*, "Labels")= chr [1:125] "CG9.1_0to10" "CG9.1_10to20" "CG9.1_20to30" "CG9.1_30to40" ...
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "bray"
## - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
# To plot the output from the mds using ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.
site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, management_type = meta_nf$sample_type) #add grouping variable "management_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta_nf$depth) #add grouping variable of depth
head(site.scrs)
## Dim1 Dim2 Dim3 management_type depth
## CG9.1_0to10 -0.27813982 -0.10346866 0.09148370 conventional 0...10
## CG9.1_10to20 -0.31446403 -0.15468057 0.01103617 conventional 10...20
## CG9.1_20to30 -0.02299791 -0.15075965 -0.18961661 conventional 20...30
## CG9.1_30to40 0.44844111 -0.09800013 -0.09893131 conventional 30...40
## CG9.1_40to70 -0.09249699 -0.11668507 -0.04215126 conventional 40...
## CG9.2_0to10 -0.19817772 -0.09107915 0.22764881 conventional 0...10
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
first get axis %
GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray")
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", color="sample_type", shape="depth")
pord
Change axis percentages accordingly!
pcoa.plot12 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC2 (9.4%)", x = "PC1 (22.7%)")
pcoa.plot12
first get axis %
GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray", k = 3)
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
pord
Change axis percentages accordingly!
pcoa.plot13 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 13), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (8.0%)", x = "PC1 (22.7%)")
pcoa.plot13
library("ggpubr")
figure <- ggarrange(pcoa.plot12, pcoa.plot13,
labels = c("A", "B"),
ncol = 2, nrow = 1, common.legend = TRUE, legend="right", widths = c(1, 1))
figure
Based on the PCoAs, I decided to cluster samples within each management types into topsoil (0-20 cm), subsoil (20-30 cm) and deep soil (30-80 cm)
Add the above categories to our samples
meta_nf$cluster <- NA
meta_nf$cluster[meta_nf$depth_numerical<20 & meta_nf$sample_type=="meadow"]<-"topsoil_meadow"
meta_nf$cluster[meta_nf$depth_numerical<20 & meta_nf$sample_type=="organic"]<-"topsoil_organic"
meta_nf$cluster[meta_nf$depth_numerical<20 & meta_nf$sample_type=="conventional"]<-"topsoil_conventional"
meta_nf$cluster[meta_nf$depth_numerical==25 & meta_nf$sample_type=="meadow"]<-"subsoil_meadow"
meta_nf$cluster[meta_nf$depth_numerical==25 & meta_nf$sample_type=="organic"]<-"subsoil_organic"
meta_nf$cluster[meta_nf$depth_numerical==25 & meta_nf$sample_type=="conventional"]<-"subsoil_conventional"
meta_nf$cluster[meta_nf$depth_numerical>30 & meta_nf$sample_type=="meadow"]<-"deepsoil_meadow"
meta_nf$cluster[meta_nf$depth_numerical>30 & meta_nf$sample_type=="organic"]<-"deepsoil_organic"
meta_nf$cluster[meta_nf$depth_numerical>30 & meta_nf$sample_type=="conventional"]<-"deepsoil_conventional"
cluster <- unique(meta_nf$cluster)
cluster
## [1] "topsoil_conventional" "subsoil_conventional" "deepsoil_conventional"
## [4] "topsoil_meadow" "subsoil_meadow" "deepsoil_meadow"
## [7] "topsoil_organic" "subsoil_organic" "deepsoil_organic"
# add the new meta_nf to ps_RA_nf_genus_pruned
sample_data(ps_RA_nf_genus_pruned) <- sample_data(meta_nf)
lets test separately for the ones that make sense, for example, no point testing between organic topsoil and conventional deep, but rather topsoil for both soil types and then within organic between top and deep
NOTE! At the end of chunk I do p-value adjustment (“BH”) for all comparisons in the chunk
library("data.table")
library("rstatix")
subset <- subset_samples(ps_RA_nf_genus_pruned, cluster=="topsoil_meadow" | cluster=="topsoil_conventional" | cluster=="topsoil_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 50 samples ]
## sample_data() Sample Data: [ 50 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
# Make cluster into a factor
melt_df$cluster <- factor(melt_df$cluster)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$cluster,
p.adjust.method = NULL)
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df1 <- rbindlist(pval.list)
pval.list_df1 <- as.data.frame(pval.list_df1)
# make into long format: where the new column called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df1 <- gather(pval.list_df1, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df1 <- adjust_pvalue(pval.list_df1, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, cluster=="subsoil_meadow" | cluster=="subsoil_conventional" | cluster=="subsoil_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 25 samples ]
## sample_data() Sample Data: [ 25 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$cluster,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df7 <- rbindlist(pval.list)
pval.list_df7 <- as.data.frame(pval.list_df7)
# make into long format: where the new column called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df7 <- gather(pval.list_df7, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df7 <- adjust_pvalue(pval.list_df7, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, cluster=="deepsoil_meadow" | cluster=="deepsoil_conventional" | cluster=="deepsoil_organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 50 samples ]
## sample_data() Sample Data: [ 50 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$cluster,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df2 <- rbindlist(pval.list)
pval.list_df2 <- as.data.frame(pval.list_df2)
# make into long format: where the new column called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df2 <- gather(pval.list_df2, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df2 <- adjust_pvalue(pval.list_df2, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type=="organic")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 40 samples ]
## sample_data() Sample Data: [ 40 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$cluster,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df3 <- rbindlist(pval.list)
pval.list_df3 <- as.data.frame(pval.list_df3)
# make into long format: where the new column called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df3 <- gather(pval.list_df3, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df3 <- adjust_pvalue(pval.list_df3, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type=="conventional")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 45 samples ]
## sample_data() Sample Data: [ 45 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$cluster,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df4 <- rbindlist(pval.list)
pval.list_df4 <- as.data.frame(pval.list_df4)
# make into long format: where the new column called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df4 <- gather(pval.list_df4, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df4 <- adjust_pvalue(pval.list_df4, p.col = "p_value", output.col = "adj_p", method = "BH")
subset <- subset_samples(ps_RA_nf_genus_pruned, sample_type=="meadow")
subset
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 95 taxa and 40 samples ]
## sample_data() Sample Data: [ 40 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 95 taxa by 1 taxonomic ranks ]
#create data table
melt_df <- psmelt(subset)
pval.list <- list()
for (i in all_top20_unique)
{
# data.frame with the selected taxonomic group
df <- filter(melt_df, genus==i)
x <- pairwise.wilcox.test(df$Abundance, df$cluster,
p.adjust.method = "BH")
x <- as.data.frame(x[["p.value"]])
x$genus <- i
x$comparison <- rownames(x)
pval.list[[i]] <- x
}
pval.list_df5 <- rbindlist(pval.list)
pval.list_df5 <- as.data.frame(pval.list_df5)
# make into long format: where the new column called "vs" contains the sample cluster comparison, and new column "p_value" contains the p_values
pval.list_df5 <- gather(pval.list_df5, vs, p_value, 1:2, factor_key=TRUE)
# adjust p values
pval.list_df5 <- adjust_pvalue(pval.list_df5, p.col = "p_value", output.col = "adj_p", method = "BH")
all.pvals <- rbind(pval.list_df1, pval.list_df2, pval.list_df3, pval.list_df4, pval.list_df5, pval.list_df7)
all.sig.pvals <- all.pvals[all.pvals$adj_p <= 0.05, ]
write.csv2(all.sig.pvals, file = "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\HEATMAP_sig_wilcox_p_values.csv", row.names = FALSE)
all.sig.genus <- unique(all.sig.pvals$genus)
length(all.sig.genus)
## [1] 78
# only keep sig
# filter by taxa name
ps_RA_nf_genus_pruned_sig <- prune_taxa(all.sig.genus, ps_RA_nf_genus_pruned)
ps_Heatmap <- ps_RA_nf_genus_pruned_sig
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 77 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 30 sample variables ]
## tax_table() Taxonomy Table: [ 77 taxa by 1 taxonomic ranks ]
rm(ps_RA_nf_genus_pruned_sig)
df <- psmelt(ps_Heatmap)
x <- df %>%
group_by(OTU, sample_type, depth) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 1,155 × 5
## # Groups: OTU, sample_type [231]
## OTU sample_type depth mean se
## <chr> <fct> <chr> <dbl> <dbl>
## 1 Acephala meadow 0...10 0 0
## 2 Acephala meadow 10...20 0 0
## 3 Acephala meadow 20...30 0.0000157 0.00000820
## 4 Acephala meadow 30...40 0.00601 0.00601
## 5 Acephala meadow 40... 0.0343 0.0343
## 6 Acephala organic 0...10 0.00000500 0.00000327
## 7 Acephala organic 10...20 0.00000534 0.00000270
## 8 Acephala organic 20...30 0.00000762 0.00000571
## 9 Acephala organic 30...40 0 0
## 10 Acephala organic 40... 0 0
## # ℹ 1,145 more rows
write.csv2(x, file = "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\HEATMAP_sig_GENERA_mean_and_se.csv", row.names = FALSE)
I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')
FG <- read.csv2("FUNGuild_31_05_2024.csv")
# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus" "Species" "Variety" "Family" "Order"
## [6] "Phylum" "Form" "Subspecies"
I will get the annotations from genus and higher tax levels:
Genus
fg <- FG[FG$taxonomicLevel == "Genus", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"
# save with new name
FUNGuild_gen <- FG_tax_table
Family
fg <- FG[FG$taxonomicLevel == "Family", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"
# save with new name
FUNGuild_fam <- FG_tax_table
Order
fg <- FG[FG$taxonomicLevel == "Order", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"
# save with new name
FUNGuild_ord <- FG_tax_table
Phylum
fg <- FG[FG$taxonomicLevel == "Phylum", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"
# save with new name
FUNGuild_phy <- FG_tax_table
Combine all annotations:Genus Family Order Phylum
x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_fam))
# same for order
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_ord))
# same for phylum
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_phy))
# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"
# remove the rest of the columns
y <- y[, -c(4:11)]
# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"
y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
Define AMFs, Ectomycorrhizal and Plant pathogens
Here in FUNGuild column:
z <- y3
z <- z %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z
# remove the FG column
FG_tax_table <- FG_tax_table[, -c(11)]
Check the different written forms, if I have empty spaces?
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Symbiotroph" "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte" "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]
Check again
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Other Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Other Symbiotroph" "Endophyte"
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps_FG_HEATMAP <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
save(ps_FG_HEATMAP, file = 'ps_FG_for_HEATMAP')
FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP)))
ps_FG_HEATMAP_nf <- subset_samples(ps_FG_HEATMAP, sample_type!="forest")
ps_FG_HEATMAP_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
meta_nf <- meta(ps_FG_HEATMAP_nf)
meta_nf$new_depth <- meta_nf$depth
meta_nf$new_depth <- gsub("...", "-", meta_nf$new_depth, fixed = TRUE)
meta_nf$new_depth[meta_nf$new_depth=="40-"] <- "40-80"
meta_nf$new_depth[meta_nf$new_depth=="0-10"] <- "0-10 cm"
meta_nf$new_depth[meta_nf$new_depth=="10-20"] <- "10-20 cm"
meta_nf$new_depth[meta_nf$new_depth=="20-30"] <- "20-30 cm"
meta_nf$new_depth[meta_nf$new_depth=="30-40"] <- "30-40 cm"
meta_nf$new_depth[meta_nf$new_depth=="40-80"] <- "40-80 cm"
ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type!="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)
# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')
# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)
# subset to only the 77 sig genus from above
hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
# now only 77 genuses, as should be
data_subset <- as.matrix(data_subset)
# add annotations "depth" and "soil management"
my_sample_col <- data.frame(meta_nf[c("depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("depth", "soil management")
x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"
f <- z %>%
group_by(sample_type, depth) %>%
summarise_all(mean)
# remove ID column
f <- f[ , -3]
# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$depth)
# make new df with just sample type, depth and sampletypedepth
df <- f[c("sample_type", "depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 80)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)
# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples
# add annotations "depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("soil management", "depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]
library("pheatmap")
library("ggplotify")
# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943 2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")
# view data frame
unique(FUNGuild_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Saprotroph"
## [7] "Endophyte" "Ectomycorrhizal"
## [9] "Other Pathotroph" "Plant Pathogen"
## [11] "Other Symbiotroph" "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)
# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
## [1] "Plant Pathogen" "Other Pathotroph"
## [3] "Pathotroph-Saprotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Symbiotroph" "Saprotroph"
## [7] "Saprotroph-Symbiotroph" "Other Symbiotroph"
## [9] "Ectomycorrhizal" "Endophyte"
## [11] "Arbuscular Mycorrhizal"
#Create color palette
my_colour = list(
"soil management" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c"),
depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))
p2 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
ps_RA_nf <- subset_samples(ps_RA, sample_type=="forest")
# You have only submitted samples, not OTUs. Define prevalence of each taxa
# (in how many samples did each taxa appear at least once)
prev0 = apply(X = otu_table(ps_RA_nf),
MARGIN = ifelse(taxa_are_rows(ps_RA_nf), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_nf = prune_taxa((prev0 > 0), ps_RA_nf)
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 5398 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 5398 taxa by 7 taxonomic ranks ]
meta_nf <- meta(ps_RA_nf)
I will use RA values For the final heatmap figure the RA values will be standardizes (z-transformed)
# make a dataframe of the tax-table
tax <- as.data.frame(ps_RA@tax_table)
# calculate how many genus
length(unique(tax$genus))
## [1] 943
tax$genus <- sub(".*_.*", "Unclassified_genus",tax$genus)
# calculate how many genus
length(unique(tax$genus))
## [1] 710
# of which 1 is "Unclassified_genus"
# edit the phyloseq object
ps_genus <- ps
tax <- as.matrix(tax) # convert it into a matrix.
tax <- tax_table(tax) # convert into phyloseq compatible file.
tax_table(ps_genus) <- tax # incroporate into phyloseq Object
ps_genus <- aggregate_rare(ps_genus, level = 'genus', detection = 0/100, prevalence = 0/140)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 710 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 710 taxa by 2 taxonomic ranks ]
ps_genus_RA <- microbiome::transform(ps_genus, "compositional")
# remove unclassified
allTaxa = taxa_names(ps_genus_RA)
badTaxa = c("Unclassified_genus")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_genus_RA_pruned <- prune_taxa(myTaxa, ps_genus_RA)
ps_genus_RA_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 709 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 709 taxa by 2 taxonomic ranks ]
# sample wise filtering according to most abund. genera
# Initialize an empty list to store the taxa
abund.taxa <- list()
for (i in meta_nf$sample_type) {
for (j in meta_nf$depth) {
x <- sample_names(sample_data(ps_genus_RA_pruned)[sample_data(ps_genus_RA_pruned)$sample_type == i & sample_data(ps_genus_RA_pruned)$depth == j,])
# Calculate taxa mean of the selected samples
top10 <- head(sort(rowMeans(otu_table(ps_genus_RA_pruned)[,x]), decreasing = TRUE), 10)
result_name <- paste("sample_type", i, "depth", j, sep = "_")
abund.taxa[[result_name]] <- top10
}
}
management_layer <- c(names(abund.taxa))
all_top10 <- c()
for (i in management_layer) {
top10 <- c(names(abund.taxa[[i]]))
all_top10 <- c(all_top10, top10)
}
all_top10_unique <- unique(all_top10)
length(all_top10_unique)
## [1] 30
# first aggregate to genus
ps_RA_nf_genus <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0/140, prevalence = 0/100)
# filter by taxa name
ps_RA_nf_genus_pruned <- prune_taxa(all_top10_unique, ps_RA_nf_genus)
ps_RA_nf_genus_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 30 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 30 taxa by 1 taxonomic ranks ]
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
ps_genus_FOREST_ONLY_HETAMAP <- ps_RA_nf_genus_pruned
save(ps_genus_FOREST_ONLY_HETAMAP, file='ps_genus_nf_HETAMAP_all_top10_FOREST_ONLY')
OTU = as(otu_table(ps_RA_nf_genus_pruned), "matrix")
# transpose if necessary
if(taxa_are_rows(ps_RA_nf_genus_pruned)){OTU <- t(OTU)}
# Coerce to data.frame
OTU = as.data.frame(OTU)
OTU <- as.matrix(OTU)
bray_dist <- vegan::vegdist(OTU, method="bray")
str(bray_dist)
## 'dist' Named num [1:105] 0.804 0.919 0.941 0.999 0.736 ...
## - attr(*, "maxdist")= num 1
## - attr(*, "Size")= int 15
## - attr(*, "Labels")= chr [1:15] "M1_0to10" "M1_10to20" "M1_20to30" "M1_30to40" ...
## - attr(*, "Diag")= logi FALSE
## - attr(*, "Upper")= logi FALSE
## - attr(*, "method")= chr "bray"
## - attr(*, "call")= language vegan::vegdist(x = OTU, method = "bray")
pcoa <- cmdscale(bray_dist, eig=TRUE, k = 3)
# To plot the output from the mds using ggplot a new datasheet needs to be created which contains the x,y points for each site. You can do this by calling the scores of you mds.
site.scrs <- as.data.frame(scores(pcoa, display = "sites")) #save pcoa results into dataframe
site.scrs <- cbind(site.scrs, management_type = meta_nf$sample_type) #add grouping variable "management_type" to dataframe
site.scrs <- cbind(site.scrs, depth = meta_nf$depth) #add grouping variable of depth
head(site.scrs)
## Dim1 Dim2 Dim3 management_type depth
## M1_0to10 0.3123094 -0.07176912 0.25664796 forest 0...10
## M1_10to20 0.3524951 -0.09107174 -0.17221039 forest 10...20
## M1_20to30 0.2488208 0.08019621 -0.32338694 forest 20...30
## M1_30to40 -0.3062000 0.05421002 -0.10232306 forest 30...40
## M1_40to60 -0.3767020 -0.33420005 0.01743208 forest 40...
## M2_0to10 0.4386126 -0.08676279 0.11762377 forest 0...10
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
first get axis %
GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray")
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", color="sample_type", shape="depth")
pord
Change axis percentages accordingly!
pcoa.plot12 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim2, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 12), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC2 (18.8%)", x = "PC1 (28.5%)")
pcoa.plot12
first get axis %
GP.ord <- ordinate(ps_RA_nf_genus_pruned, "PCoA", "bray", k = 3)
pord = plot_ordination(ps_RA_nf_genus_pruned, GP.ord, type="samples", axes = c(1, 3), color="sample_type", shape="depth")
pord
Change axis percentages accordingly!
pcoa.plot13 <- ggplot() + geom_point(data=site.scrs, aes(Dim1, Dim3, colour = factor(site.scrs$management_type), shape = factor(site.scrs$depth)), size = 5, alpha = 0.7) + theme_cowplot() + theme(panel.background = element_rect(fill = NA, colour = "black", size = 1, linetype = "solid")) + labs(colour = "", shape = "") + theme(legend.text = element_text(size = 13), axis.text = element_text(size = 16)) + scale_colour_manual(values = MyPalette) + labs(y = "PC3 (11.0%)", x = "PC1 (28.5%)")
pcoa.plot13
library("ggpubr")
figure <- ggarrange(pcoa.plot12, pcoa.plot13,
labels = c("A", "B"),
ncol = 2, nrow = 1, common.legend = TRUE, legend="right", widths = c(1, 1))
figure
ps_Heatmap <- ps_genus_FOREST_ONLY_HETAMAP
ps_Heatmap
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 30 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 30 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_Heatmap)
x <- df %>%
group_by(OTU, sample_type, depth) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 150 × 5
## # Groups: OTU, sample_type [30]
## OTU sample_type depth mean se
## <chr> <fct> <chr> <dbl> <dbl>
## 1 Botrytis forest 0...10 0.0000325 0.0000325
## 2 Botrytis forest 10...20 0.0000800 0.0000800
## 3 Botrytis forest 20...30 0 0
## 4 Botrytis forest 30...40 0 0
## 5 Botrytis forest 40... 0.163 0.163
## 6 Chalara forest 0...10 0.0177 0.0156
## 7 Chalara forest 10...20 0.00361 0.00175
## 8 Chalara forest 20...30 0.000330 0.000167
## 9 Chalara forest 30...40 0.0000708 0.0000708
## 10 Chalara forest 40... 0 0
## # ℹ 140 more rows
write.csv2(x, file = "\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS\\HEATMAP_sig_GENERA_mean_and_se_FOREST_ONLY_all_10_most_abund.csv", row.names = FALSE)
I need to make a separate funguild phyloseq for heatmap, where I have only the genus and higher level annotation (no species level)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\YONI_ITS_analyses\\R_ITS_yoni\\Analyses_final\\RE_ANNOTATION_2024')
FG <- read.csv2("FUNGuild_31_05_2024.csv")
# what different levels there are:
unique(FG$taxonomicLevel)
## [1] "Genus" "Species" "Variety" "Family" "Order"
## [6] "Phylum" "Form" "Subspecies"
I will get the annotations from genus and higher tax levels:
Genus
fg <- FG[FG$taxonomicLevel == "Genus", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "genus"
FG_tax_table <- merge(tax_table, fg, by = "genus", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
# and OTU as row names
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_gen"
colnames(FG_tax_table)[2] <- "guild_gen"
# save with new name
FUNGuild_gen <- FG_tax_table
Family
fg <- FG[FG$taxonomicLevel == "Family", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "family"
FG_tax_table <- merge(tax_table, fg, by = "family", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_fam"
colnames(FG_tax_table)[2] <- "guild_fam"
# save with new name
FUNGuild_fam <- FG_tax_table
Order
fg <- FG[FG$taxonomicLevel == "Order", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "order"
FG_tax_table <- merge(tax_table, fg, by = "order", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and taxon as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_ord"
colnames(FG_tax_table)[2] <- "guild_ord"
# save with new name
FUNGuild_ord <- FG_tax_table
Phylum
fg <- FG[FG$taxonomicLevel == "Phylum", ]
tax_table <- as.data.frame(tax_table(ps))
tax_table$OTU <- rownames(tax_table)
# merge tables
colnames(fg)[1] <- "phylum"
FG_tax_table <- merge(tax_table, fg, by = "phylum", all.x = TRUE)
# modify "possible"s to NA's at guild and trophic mode -level
FG_tax_table$guild[FG_tax_table$confidenceRanking == "Possible"] <- NA
FG_tax_table$trophicMode[FG_tax_table$confidenceRanking == "Possible"] <- NA
# only keep the OTU, TrophicMode and guild
FG_tax_table <- FG_tax_table[c("OTU", "trophicMode", "guild")]
# and OTU as row names
FG_tax_table <- column_to_rownames(FG_tax_table, var = "OTU")
# change column names
colnames(FG_tax_table)[1] <- "trophicMode_phy"
colnames(FG_tax_table)[2] <- "guild_phy"
# save with new name
FUNGuild_phy <- FG_tax_table
Combine all annotations:Genus Family Order Phylum
x <- left_join(rownames_to_column(FUNGuild_gen), rownames_to_column(FUNGuild_fam), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_ord), by = "rowname")
x <- left_join(x, rownames_to_column(FUNGuild_phy), by = "rowname")
# get the value from another column if NA
y <- x %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_fam))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_fam))
# same for order
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_ord))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_ord))
# same for phylum
# get the value from another column if NA
y <- y %>%
mutate(trophicMode_gen = coalesce(trophicMode_gen,trophicMode_phy))
# get the value from another column if NA
y <- y %>%
mutate(guild_gen = coalesce(guild_gen,guild_phy))
# then rename the gen columns
colnames(y)[2] <- "trophicMode"
colnames(y)[3] <- "guild"
# remove the rest of the columns
y <- y[, -c(4:11)]
# rownames
y2 <- y[,-1]
rownames(y2) <- y[,1]
# Remove empty spaces and remove also "|"
y2$trophicMode <- gsub(" ", "", y2$trophicMode, fixed = TRUE)
y2$guild <- gsub("|", "", y2$guild, fixed = TRUE)
y3 <- left_join(rownames_to_column(y2), rownames_to_column(tax_table), by = "rowname")
row.names(y3) <- y3$rowname
y3 <- y3[, -1]
Define AMFs, Ectomycorrhizal and Plant pathogens
Here in FUNGuild column:
z <- y3
z <- z %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
FG_tax_table <- z
# remove the FG column
FG_tax_table <- FG_tax_table[, -c(11)]
Check the different written forms, if I have empty spaces?
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [9] "Saprotroph-Saprotroph-Symbiotroph" "Pathotroph-Pathotroph-Saprotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Symbiotroph" "Saprotroph-Saprotroph-Symbiotroph"
## [13] "Endophyte" "Pathotroph-Pathotroph-Saprotroph"
# change some names for FUNGuild
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$FUNGuild[FG_tax_table$FUNGuild=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# and for trophicMode
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Pathotroph-Pathotroph-Saprotroph"] <- "Pathotroph-Saprotroph"
FG_tax_table$trophicMode[FG_tax_table$trophicMode=="Saprotroph-Saprotroph-Symbiotroph"] <- "Saprotroph-Symbiotroph"
# reorder
FG_tax_table <- FG_tax_table[, c(1:2, 11, 3:10)]
Check again
unique(FG_tax_table$trophicMode)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Pathotroph" "Symbiotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
#unique(FG_tax_table$guild)
unique(FG_tax_table$FUNGuild)
## [1] "Saprotroph" NA
## [3] "Pathotroph-Saprotroph" "Saprotroph-Symbiotroph"
## [5] "Plant Pathogen" "Ectomycorrhizal"
## [7] "Arbuscular Mycorrhizal" "Other Pathotroph"
## [9] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [11] "Other Symbiotroph" "Endophyte"
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps_FG_HEATMAP <- phyloseq(otu_table(ps), tax_table(as.matrix(FG_tax_table)), sample_data(ps))
ps_FG_HEATMAP
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
FG_tax <- as.data.frame(as.matrix(tax_table(ps_FG_HEATMAP)))
ps_FG_HEATMAP_nf <- subset_samples(ps_FG_HEATMAP, sample_type=="forest")
ps_FG_HEATMAP_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
meta_nf <- meta(ps_FG_HEATMAP_nf)
meta_nf$new_depth <- meta_nf$depth
meta_nf$new_depth <- gsub("...", "-", meta_nf$new_depth, fixed = TRUE)
meta_nf$new_depth[meta_nf$new_depth=="40-"] <- "40-80"
meta_nf$new_depth[meta_nf$new_depth=="0-10"] <- "0-10 cm"
meta_nf$new_depth[meta_nf$new_depth=="10-20"] <- "10-20 cm"
meta_nf$new_depth[meta_nf$new_depth=="20-30"] <- "20-30 cm"
meta_nf$new_depth[meta_nf$new_depth=="30-40"] <- "30-40 cm"
meta_nf$new_depth[meta_nf$new_depth=="40-80"] <- "40-80 cm"
ps_genus <- aggregate_rare(ps, level = 'genus', detection = 0/100, prevalence = 0/100)
ps_genus
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
# subset
ps_genus_NO_FOREST <- subset_samples(ps_genus, sample_type=="forest")
ps_genus_NO_FOREST
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 943 taxa and 15 samples ]
## sample_data() Sample Data: [ 15 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 943 taxa by 1 taxonomic ranks ]
meta_nf <- meta(ps_genus_NO_FOREST)
# RA-transform
ps_genus_NO_FOREST_RA <- microbiome::transform(ps_genus_NO_FOREST, 'compositional')
# Z-transform
ps_genus_NO_FOREST_RA_z <- microbiome::transform(ps_genus_NO_FOREST_RA, 'Z', log10 = FALSE)
# subset to only the 30 genera from above
hetamap.taxa <- taxa_names(ps_Heatmap)
data <- as(otu_table(ps_genus_NO_FOREST_RA_z), "matrix")
data <- as.data.frame(data)
data_subset <- data[(row.names(data) %in% hetamap.taxa),]
# now only 30 genuses, as should be
data_subset <- as.matrix(data_subset)
# add annotations "depth" and "soil management"
my_sample_col <- data.frame(meta_nf[c("depth", "sample_type")], row.names = row.names(meta_nf))
colnames(my_sample_col) <- c("depth", "soil management")
x <- as.data.frame(t(data_subset))
y <- as.data.frame(my_sample_col)
colnames(y) <- c("depth", "sample_type")
z <- dplyr::left_join(rownames_to_column(x), rownames_to_column(y), by=c("rowname" = "rowname"))
colnames(z)[1] <- "ID"
f <- z %>%
group_by(sample_type, depth) %>%
summarise_all(mean)
# remove ID column
f <- f[ , -3]
# make new ID column, which is the soiltypedepth
library(stringr)
f$ID <- str_c(f$sample_type, '_', f$depth)
# make new df with just sample type, depth and sampletypedepth
df <- f[c("sample_type", "depth", "ID")]
# remove extra columns from f
f2 <- f[, -c(1, 2, 33)]
# column into rownames
rownames(f2) <- f$ID
# make into numeric matrix
f3 <- data.matrix(f2, rownames.force = NA)
f4 <- t(f3)
# same for df
# remove extra columns from df
df2 <- df[ , -c(3)]
# column into rownames
rownames(df2) <- df$ID
my_sample_col2 <- df2
my_sample_col3 <- as.data.frame(my_sample_col2)
rownames(my_sample_col3) <- df$ID
# lets add annotations of samples
# add annotations "depth" and "soil_type" and change order
colnames(my_sample_col3) <- c("soil management", "depth")
my_sample_col3 <- my_sample_col3[, c(2,1)]
library("pheatmap")
# only keep the genus and FUNGuild
x <- FG_tax[, c(9, 3)]
# Remove duplicates by single column
FUNGuild_tax_table <- x[!duplicated(x$genus), ]
dim(FUNGuild_tax_table)
## [1] 943 2
# and genus as row names
rownames(FUNGuild_tax_table) <- NULL
FUNGuild_tax_table <- column_to_rownames(FUNGuild_tax_table, var = "genus")
# view data frame
unique(FUNGuild_tax_table$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Saprotroph"
## [7] "Endophyte" "Ectomycorrhizal"
## [9] "Other Pathotroph" "Plant Pathogen"
## [11] "Other Symbiotroph" "Pathotroph-Symbiotroph"
FUNGuild_tax_table$FUNGuild <- as.factor(FUNGuild_tax_table$FUNGuild)
# change level order
FUNGuild_tax_table$FUNGuild <- factor(FUNGuild_tax_table$FUNGuild, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
## [1] "Plant Pathogen" "Other Pathotroph"
## [3] "Pathotroph-Saprotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Symbiotroph" "Saprotroph"
## [7] "Saprotroph-Symbiotroph" "Other Symbiotroph"
## [9] "Ectomycorrhizal" "Endophyte"
## [11] "Arbuscular Mycorrhizal"
#Create color palette
my_colour = list(
"soil management" = c(forest = "#1167b1"),
depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))
p3 <- as.ggplot(function() pheatmap(f4, cluster_cols = FALSE, cluster_rows = TRUE, annotation_col = my_sample_col3, annotation_colors = my_colour, color=colorRampPalette(c("navy", "white", "red"))(50), show_colnames = FALSE, legend = TRUE, annotation_row = FUNGuild_tax_table, border_color = NA, cellheight = 16, fontsize = 14, fontsize_row = 14, annotation_names_row = FALSE, annotation_names_col = FALSE))
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(metagMisc)
library(pheatmap)
library(metagMisc)
library(RColorBrewer)
library(viridis)
library(tidyverse)
library(ggpubr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps, "compositional")
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
FG_tax <- ps_FG %>% tax_table() %>% as.data.frame()
unique(FG_tax$trophicMode)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Symbiotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Pathotroph"
## [7] "Saprotroph" "Pathotroph-Symbiotroph"
Note! In the composition figure:
z <- FG_tax
z <- z %>%
mutate(FG = case_when(grepl("Ectomycorrhizal", guild) & trophicMode=="Symbiotroph" ~ "Ectomycorrhizal", grepl("Arbuscular", guild) ~ "Arbuscular Mycorrhizal", guild == "Endophyte" & trophicMode=="Symbiotroph" ~ "Endophyte", guild=="Plant Pathogen" & trophicMode=="Pathotroph"~ "Plant Pathogen"))
z <- z %>%
mutate(FUNGuild = case_when(FG == "Ectomycorrhizal" | FG == "Arbuscular Mycorrhizal" | FG == "Endophyte" | FG == "Plant Pathogen" ~ FG, guild != "Ectomycorrhizal" | FG != "Arbuscular Mycorrhizal" | FG != "Endophyte" | FG != "Plant Pathogen" ~ z$trophicMode))
# change some names for FUNGuild
z$FUNGuild[z$FUNGuild=="Symbiotroph"] <- "Other Symbiotroph"
z$FUNGuild[z$FUNGuild=="Pathotroph"] <- "Other Pathotroph"
# remove species and FG
tax <- z[, -c(10, 12)]
unique(tax$FUNGuild)
## [1] NA "Saprotroph-Symbiotroph"
## [3] "Arbuscular Mycorrhizal" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Saprotroph" "Other Pathotroph"
## [7] "Saprotroph" "Endophyte"
## [9] "Ectomycorrhizal" "Pathotroph-Symbiotroph"
## [11] "Plant Pathogen" "Other Symbiotroph"
# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 12 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples
# lets not remove NAs!!
# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 11 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 11 taxa by 2 taxonomic ranks ]
# 11 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
df <- psmelt(x_RA)
sampletype_names <- list(
'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)
sampletype_labeller <- function(variable,value){
return(sampletype_names[value])
}
df$species <- factor(df$species)
levels(df$species)
## [1] "Arbuscular Mycorrhizal" "Ectomycorrhizal"
## [3] "Endophyte" "Other Pathotroph"
## [5] "Other Symbiotroph" "Pathotroph-Saprotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph" "Pathotroph-Symbiotroph"
## [9] "Plant Pathogen" "Saprotroph"
## [11] "Saprotroph-Symbiotroph"
# change level order
df$species <- factor(df$species, levels = c("Plant Pathogen", "Other Pathotroph", "Pathotroph-Saprotroph", "Pathotroph-Saprotroph-Symbiotroph", "Pathotroph-Symbiotroph", "Saprotroph", "Saprotroph-Symbiotroph", "Other Symbiotroph", "Ectomycorrhizal", "Endophyte", "Arbuscular Mycorrhizal"))
levels(FUNGuild_tax_table$FUNGuild)
## [1] "Plant Pathogen" "Other Pathotroph"
## [3] "Pathotroph-Saprotroph" "Pathotroph-Saprotroph-Symbiotroph"
## [5] "Pathotroph-Symbiotroph" "Saprotroph"
## [7] "Saprotroph-Symbiotroph" "Other Symbiotroph"
## [9] "Ectomycorrhizal" "Endophyte"
## [11] "Arbuscular Mycorrhizal"
#Create color palette
my_colour = list(
"soil management" = c(meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c"),
depth = c('0...10' = "#387212", '10...20' = "#ADC476", '20...30' = "#D8D2BA",'30...40' = "#907852", '40...' = "#6A4C3A"),
FUNGuild = c("Plant Pathogen" = "deeppink", "Other Pathotroph" = "#d6849a", "Pathotroph-Saprotroph" = "#e3adbc", "Pathotroph-Saprotroph-Symbiotroph" = "#f1d6dd", "Pathotroph-Symbiotroph" = "#faf1f4", "Saprotroph" = "#CBBEAD", "Saprotroph-Symbiotroph" = "darkseagreen", "Other Symbiotroph" = "lightgreen", "Ectomycorrhizal" = "darkgreen", "Endophyte" = "#A2CF31", "Arbuscular Mycorrhizal" = "darkolivegreen1"))
#Create color palette
cbbPalette_reduced <- c("deeppink", "#d6849a", "#e3adbc", "#f1d6dd", "#faf1f4", "#CBBEAD", "darkseagreen","lightgreen", "darkgreen", "#A2CF31", "darkolivegreen1")
# Make new depth variable
df$new_depth <- df$depth
df$new_depth <- gsub("...", "-", df$new_depth, fixed = TRUE)
df$new_depth[df$new_depth=="40-"] <- "40-80"
df$new_depth[df$new_depth=="0-10"] <- "0-10 cm"
df$new_depth[df$new_depth=="10-20"] <- "10-20 cm"
df$new_depth[df$new_depth=="20-30"] <- "20-30 cm"
df$new_depth[df$new_depth=="30-40"] <- "30-40 cm"
df$new_depth[df$new_depth=="40-80"] <- "40-80 cm"
FG <- ggplot(df, aes(x = new_depth ,y = Abundance, fill = species)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette_reduced) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=14),
axis.title = element_text(size=18),
legend.text = element_text(size=16),
legend.title = element_text(size=18),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.8, 'cm'),
title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 4, title="")) + theme(strip.text.x = element_text(size = 22)) +
ylab(label = "Relative abundance") + theme(legend.position="top") + xlab("Depth")
FG
ps_RA <- microbiome::transform(ps, 'compositional')
ps_RA.class <- aggregate_rare(ps_RA, level = 'class', detection = 3/100, prevalence = 3/140, include.lowest = TRUE)
ps_RA.class
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 16 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 16 taxa by 2 taxonomic ranks ]
#create data table
ps_RA.class_df <- psmelt(ps_RA.class)
#Create color palette
#set color palette from RColorBrewer
# Define the number of colors you want
library("RColorBrewer") # nice color options
nb.cols = length(unique((as.data.frame(ps_RA.class@tax_table))$class))
cbbPalette <- colorRampPalette(brewer.pal(12, "Set3"))(nb.cols)
sampletype_names <- list(
'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)
sampletype_labeller <- function(variable,value){
return(sampletype_names[value])
}
# check unique values for class
unique(ps_RA.class_df$class)
## [1] "Leotiomycetes" "Agaricomycetes"
## [3] "Dothideomycetes" "Sordariomycetes"
## [5] "Archaeosporomycetes" "Mortierellomycetes"
## [7] "Geoglossomycetes" "Tremellomycetes"
## [9] "Ascomycota_unclassified" "Glomeromycetes"
## [11] "Microbotryomycetes" "Pezizomycetes"
## [13] "Other" "Eurotiomycetes"
## [15] "Basidiomycota_unclassified" "Orbiliomycetes"
is.factor(ps_RA.class_df$class)
## [1] FALSE
ps_RA.class_df$class <- as.factor(ps_RA.class_df$class)
levels(ps_RA.class_df$class)
## [1] "Agaricomycetes" "Archaeosporomycetes"
## [3] "Ascomycota_unclassified" "Basidiomycota_unclassified"
## [5] "Dothideomycetes" "Eurotiomycetes"
## [7] "Geoglossomycetes" "Glomeromycetes"
## [9] "Leotiomycetes" "Microbotryomycetes"
## [11] "Mortierellomycetes" "Orbiliomycetes"
## [13] "Other" "Pezizomycetes"
## [15] "Sordariomycetes" "Tremellomycetes"
ps_RA.class_df$class <- relevel(ps_RA.class_df$class, "Other")
cbbPalette <- c("#b2b2b2", "#8DD3C7", "#FFED6F", "#CAAEC5", "#F68378", "#8D6942", "#F3B962", "#BCD868", "#6E99BE", "#F0D1E1", "#C191C2", "#FFFFC6", "darkgreen", "#D0D9CD", "#8BC081", "#FF8DB5")
# plot with detection = 1/100, prevalence = 2/100
#Create a plot
classF <- ggplot(ps_RA.class_df, aes(x = depth ,y = Abundance, fill = class)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=14),
axis.title = element_text(size=18),
legend.text = element_text(size=16),
legend.title = element_text(size=18),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.8, 'cm'),
title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(nrow = 6, title="")) + theme(strip.text.x = element_text(size = 22)) +
ylab(label = "Relative abundance") + theme(legend.position="top") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x = element_blank())
classF
ps_RA_phyla_aggr <- aggregate_rare(ps_RA, level = 'phylum', detection = 2/100, prevalence = 2/140)
ps_RA_phyla_aggr
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 6 taxa by 2 taxonomic ranks ]
#create data table
ps_RA_phyla_df <- psmelt(ps_RA_phyla_aggr)
#Create color palette
cbbPalette <- c("#666666","#1B9E77", "#D95F02", "#E7298A", "#7570B3", "#66A61E")
sampletype_names <- list(
'forest' = "forest", 'meadow' = "meadow", 'organic' = "organic", 'conventional' = "conventional"
)
sampletype_labeller <- function(variable,value){
return(sampletype_names[value])
}
# check unique values for phylum
unique(ps_RA_phyla_df$phylum)
## [1] "Ascomycota" "Basidiomycota" "Glomeromycota"
## [4] "Mortierellomycota" "Rozellomycota" "Other"
# [1] "Ascomycota" "Basidiomycota" "Glomeromycota" "Mortierellomycota"
# [5] "Rozellomycota" "Other"
ps_RA_phyla_df$phylum <- as.factor(ps_RA_phyla_df$phylum)
levels(ps_RA_phyla_df$phylum)
## [1] "Ascomycota" "Basidiomycota" "Glomeromycota"
## [4] "Mortierellomycota" "Other" "Rozellomycota"
ps_RA_phyla_df$phylum <- factor(ps_RA_phyla_df$phylum, levels = c("Other","Ascomycota", "Basidiomycota", "Glomeromycota", "Mortierellomycota", "Rozellomycota"))
levels(ps_RA_phyla_df$phylum)
## [1] "Other" "Ascomycota" "Basidiomycota"
## [4] "Glomeromycota" "Mortierellomycota" "Rozellomycota"
#Create a plot
phylumF <- ggplot(ps_RA_phyla_df, aes(x = depth ,y = Abundance, fill = phylum)) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + facet_grid (cols = vars(sample_type), labeller=sampletype_labeller) + geom_bar(stat="identity", position="fill") + scale_fill_manual(values = cbbPalette) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=14),
axis.title = element_text(size=18),
legend.text = element_text(size=16),
legend.title = element_text(size=18),
legend.spacing.y = unit(0, 'cm'),
legend.key.size = unit(0.8, 'cm'),
title = element_text(size=18))+ theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(fill = guide_legend(title="")) + theme(strip.text.x = element_text(size = 22)) +
ylab(label = "Relative abundance") + theme(legend.position="top") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x = element_blank())
phylumF
library(ggplotify)
left <- ggarrange(phylumF, classF , FG,
labels = c("A", "B", "C"),
ncol = 1, nrow = 3, heights = c(1.1, 1.5, 1.5))
left
right <- ggarrange(p2, p3,
labels = c("D", "E"),
ncol = 1, nrow = 2, heights = c(3, 1.3))
right
figure <- ggarrange(left, right,
ncol = 2, nrow = 1, heights = c(1, 1), widths = c(1, 1.2))
figure
Saved with width 2400 and height 2600
Note! While doing tests, always check the homogeneity of variance result and decide based on that which later test result to use (Anova and Tukey or Kruskal and Wilcoxon)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
Note:
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename FUNGuild to species
colnames(tax)[3] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 12 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 12 taxa by 2 taxonomic ranks ]
# 12 taxa and 140 samples
# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 11 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 11 taxa by 2 taxonomic ranks ]
# 11 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
unique(FG_df$OTU)
## [1] "Saprotroph" "Ectomycorrhizal"
## [3] "Saprotroph-Symbiotroph" "Pathotroph-Saprotroph"
## [5] "Pathotroph-Symbiotroph" "Arbuscular Mycorrhizal"
## [7] "Endophyte" "Pathotroph-Saprotroph-Symbiotroph"
## [9] "Other Pathotroph" "Plant Pathogen"
## [11] "Other Symbiotroph"
taxa <- "Plant Pathogen"
# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)
library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 5.1088 0.002218 **
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# first one-way ANOVA
res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.03572 0.011905 8.387 3.72e-05 ***
## Residuals 136 0.19304 0.001419
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.009502785 -0.020166943 0.03917251 0.8386455
## organic-forest 0.040264954 0.010595226 0.06993468 0.0031516
## conventional-forest 0.038091975 0.008875247 0.06730870 0.0049921
## organic-meadow 0.030762169 0.008849623 0.05267471 0.0020795
## conventional-meadow 0.028589190 0.007294023 0.04988436 0.0035796
## conventional-organic -0.002172979 -0.023468146 0.01912219 0.9934267
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 32.097, df = 3, p-value = 4.992e-07
pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.33468 - -
## organic 0.00059 0.00022 -
## conventional 0.00022 9.5e-05 0.95084
##
## P value adjustment method: BH
x <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
unique(FG_df$OTU)
## [1] "Saprotroph" "Ectomycorrhizal"
## [3] "Saprotroph-Symbiotroph" "Pathotroph-Saprotroph"
## [5] "Pathotroph-Symbiotroph" "Arbuscular Mycorrhizal"
## [7] "Endophyte" "Pathotroph-Saprotroph-Symbiotroph"
## [9] "Other Pathotroph" "Plant Pathogen"
## [11] "Other Symbiotroph"
taxa <- "Endophyte"
# Construct a data.frame with the selected
# taxonomic group and grouping
# relative
df <- filter(FG_df, OTU == taxa)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.9477 0.4196
## 136
# first one-way ANOVA
res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.0286 0.009522 1.621 0.187
## Residuals 136 0.7988 0.005874
TukeyHSD(res.aov)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.004251549 -0.05610473 0.06460783 0.9978070
## organic-forest 0.036929219 -0.02342706 0.09728550 0.3868926
## conventional-forest 0.025133837 -0.03430092 0.08456859 0.6901727
## organic-meadow 0.032677670 -0.01189840 0.07725374 0.2300327
## conventional-meadow 0.020882288 -0.02243786 0.06420244 0.5938191
## conventional-organic -0.011795382 -0.05511553 0.03152477 0.8936888
# perform the Kruskal test
kruskal.test(Abundance ~ sample_type, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 17.962, df = 3, p-value = 0.0004478
pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.6097 - -
## organic 0.0129 0.0032 -
## conventional 0.0269 0.0073 0.9088
##
## P value adjustment method: BH
x <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
# remove species
z <- as.data.frame(tax_table(ps_FG))
tax <- z[, 1:9]
# rename trophicmode to species
colnames(tax)[1] <- "species"
tax <- tax %>% as.matrix()
# reassign to phyloseq
x <- ps_FG
tax_table(x) <- tax_table(tax)
x <- aggregate_rare(x, level = "species", detection = 0, prevalence = 0)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 140 samples
# remove "Unknown"
allTaxa = taxa_names(x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
x <- prune_taxa(myTaxa, x)
x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa and 140 samples
x_RA <- transform(x, 'compositional')
#create data table
FG_df <- psmelt(x_RA)
unique(FG_df$species)
## [1] "Saprotroph" "Symbiotroph"
## [3] "Saprotroph-Symbiotroph" "Pathotroph-Saprotroph"
## [5] "Pathotroph-Symbiotroph" "Pathotroph"
## [7] "Pathotroph-Saprotroph-Symbiotroph"
FG_df$species <- as.factor(FG_df$species)
for (i in levels(FG_df$species)) {
df <- filter(FG_df, species == i)
print(i)
result = leveneTest(Abundance ~ sample_type, df)
print(result)
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
print(w)
res.aov <- aov(Abundance ~ sample_type, data = df)
aov <- summary(res.aov)
print(aov)
tukey <- TukeyHSD(res.aov)
print(tukey)
}
## [1] "Pathotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 8.875 2.058e-05 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 29.339, df = 3, p-value = 1.901e-06
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.08892 - -
## organic 0.00127 0.00127 -
## conventional 0.00066 0.00015 0.17096
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.2269 0.07562 11.46 9.54e-07 ***
## Residuals 136 0.8974 0.00660
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.01583120 -0.04814065 0.07980305 0.9175818
## organic-forest 0.08324801 0.01927616 0.14721985 0.0050968
## conventional-forest 0.10034692 0.03735180 0.16334205 0.0003455
## organic-meadow 0.06741681 0.02017046 0.11466315 0.0016868
## conventional-meadow 0.08451572 0.03860053 0.13043091 0.0000256
## conventional-organic 0.01709892 -0.02881627 0.06301411 0.7674770
##
## [1] "Pathotroph-Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.4128 0.7441
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 20.866, df = 3, p-value = 0.0001122
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.620 - -
## organic 0.473 0.092 -
## conventional 0.022 4e-05 0.025
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.166 0.05530 2.305 0.0797 .
## Residuals 136 3.263 0.02399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.01745120 -0.139437290 0.1045349 0.9823482
## organic-forest 0.02945479 -0.092531299 0.1514409 0.9228783
## conventional-forest 0.06811418 -0.052009412 0.1882378 0.4554546
## organic-meadow 0.04690599 -0.043186704 0.1369987 0.5302224
## conventional-meadow 0.08556539 -0.001988977 0.1731197 0.0580409
## conventional-organic 0.03865939 -0.048894968 0.1262138 0.6601975
##
## [1] "Pathotroph-Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 11.314 1.131e-06 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 16.058, df = 3, p-value = 0.001104
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.120 - -
## organic 0.267 0.012 -
## conventional 0.483 0.001 0.177
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.0909 0.030312 7.509 0.000109 ***
## Residuals 136 0.5490 0.004037
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.04266408 -0.092698462 0.007370300 0.1235555
## organic-forest 0.02369330 -0.026341081 0.073727681 0.6078475
## conventional-forest -0.01552221 -0.064792665 0.033748240 0.8451907
## organic-meadow 0.06635738 0.029404542 0.103310221 0.0000419
## conventional-meadow 0.02714187 -0.008769836 0.063053574 0.2060867
## conventional-organic -0.03921551 -0.075127218 -0.003303808 0.0264115
##
## [1] "Pathotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.7863 0.5035
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 9.1683, df = 3, p-value = 0.02713
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.135 - -
## organic 0.931 0.133 -
## conventional 0.750 0.033 0.454
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.0105 0.003513 0.789 0.502
## Residuals 136 0.6056 0.004453
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.0148900730 -0.03766324 0.06744339 0.8820199
## organic-forest 0.0202069102 -0.03234641 0.07276023 0.7495680
## conventional-forest 0.0006628179 -0.05108811 0.05241375 0.9999867
## organic-meadow 0.0053168372 -0.03349636 0.04413003 0.9844347
## conventional-meadow -0.0142272551 -0.05194690 0.02349239 0.7604539
## conventional-organic -0.0195440923 -0.05726374 0.01817556 0.5343221
##
## [1] "Saprotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.4939 0.219
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 29.348, df = 3, p-value = 1.893e-06
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.00015 - -
## organic 0.16698 1.7e-05 -
## conventional 0.04563 0.00027 0.35940
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 2.056 0.6855 13.22 1.26e-07 ***
## Residuals 136 7.052 0.0519
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 0.34542965 0.16610621 0.52475309 0.0000098
## organic-forest 0.07736653 -0.10195691 0.25668997 0.6765045
## conventional-forest 0.13565316 -0.04093236 0.31223868 0.1937636
## organic-meadow -0.26806312 -0.40050226 -0.13562398 0.0000032
## conventional-meadow -0.20977649 -0.33848420 -0.08106878 0.0002385
## conventional-organic 0.05828663 -0.07042108 0.18699433 0.6417935
##
## [1] "Saprotroph-Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.4218 0.7376
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 0.97889, df = 3, p-value = 0.8064
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.94 - -
## organic 0.94 0.94 -
## conventional 0.94 0.94 0.94
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.042 0.01398 0.381 0.767
## Residuals 136 4.984 0.03665
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.040917266 -0.19167857 0.10984404 0.8945909
## organic-forest 0.001318962 -0.14944234 0.15208027 0.9999957
## conventional-forest -0.010145065 -0.15860454 0.13831440 0.9979963
## organic-meadow 0.042236228 -0.06910837 0.15358083 0.7572807
## conventional-meadow 0.030772200 -0.07743530 0.13897970 0.8808802
## conventional-organic -0.011464028 -0.11967153 0.09674348 0.9926584
##
## [1] "Symbiotroph"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 10.346 3.531e-06 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 15.613, df = 3, p-value = 0.001361
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.00084 - -
## organic 0.00277 0.27073 -
## conventional 0.00158 0.70169 0.70169
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 0.980 0.3268 11.88 5.87e-07 ***
## Residuals 136 3.741 0.0275
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Abundance ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -0.27511837 -0.40573532 -0.14450143 0.0000012
## organic-forest -0.23528850 -0.36590544 -0.10467155 0.0000394
## conventional-forest -0.27910980 -0.40773248 -0.15048713 0.0000006
## organic-meadow 0.03982988 -0.05663713 0.13629688 0.7059692
## conventional-meadow -0.00399143 -0.09774051 0.08975765 0.9995118
## conventional-organic -0.04382131 -0.13757038 0.04992777 0.6179047
x <- FG_df %>% group_by(species, sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 28 × 4
## # Groups: species [7]
## species sample_type mean se
## <fct> <fct> <dbl> <dbl>
## 1 Pathotroph forest 0.0180 0.00720
## 2 Pathotroph meadow 0.0338 0.00750
## 3 Pathotroph organic 0.101 0.0170
## 4 Pathotroph conventional 0.118 0.0133
## 5 Pathotroph-Saprotroph forest 0.106 0.0351
## 6 Pathotroph-Saprotroph meadow 0.0889 0.0196
## 7 Pathotroph-Saprotroph organic 0.136 0.0265
## 8 Pathotroph-Saprotroph conventional 0.174 0.0257
## 9 Pathotroph-Saprotroph-Symbiotroph forest 0.0557 0.0304
## 10 Pathotroph-Saprotroph-Symbiotroph meadow 0.0131 0.00319
## # ℹ 18 more rows
ps_x <- subset_samples(ps_FG, sample_type=="forest" & depth=="40...")
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 3 samples ]
## sample_data() Sample Data: [ 3 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
ps_FG_Tm <- aggregate_rare(ps_x, level = "trophicMode", detection = 0, prevalence = 0)
ps_FG_Tm
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 3 samples ]
## sample_data() Sample Data: [ 3 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa and 15 samples
# remove "Unknown"
allTaxa = taxa_names(ps_FG_Tm)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_FG_Tm <- prune_taxa(myTaxa, ps_FG_Tm)
ps_FG_Tm
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7 taxa and 3 samples ]
## sample_data() Sample Data: [ 3 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa and 15 samples
ps_FG_Tm_RA <- microbiome::transform(ps_FG_Tm, "compositional")
FG_df <- psmelt(ps_FG_Tm_RA)
x <- FG_df %>%
group_by(OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
x
## # A tibble: 7 × 3
## OTU mean se
## <chr> <dbl> <dbl>
## 1 Pathotroph 0.00516 0.00516
## 2 Pathotroph-Saprotroph 0.181 0.154
## 3 Pathotroph-Saprotroph-Symbiotroph 0.0185 0.0185
## 4 Pathotroph-Symbiotroph 0 0
## 5 Saprotroph 0.0193 0.0190
## 6 Saprotroph-Symbiotroph 0.0274 0.0272
## 7 Symbiotroph 0.748 0.141
Note all correlations are done without forest!
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
# remove NAs from ps_FG
# this aggregation makes NAs into "Unknown"
ps_FG_x <- aggregate_rare(ps_FG, level = 'trophicMode', detection = 0/100, prevalence = 0/140)
ps_FG_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 8 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 8 taxa by 1 taxonomic ranks ]
# 8 taxa
# remove "Unknown"
allTaxa = taxa_names(ps_FG_x)
badTaxa = c("Unknown")
myTaxa <- allTaxa[!(allTaxa %in% badTaxa)]
ps_FG_x_pruned <- prune_taxa(myTaxa, ps_FG_x)
ps_FG_x_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 7 taxa by 1 taxonomic ranks ]
# 7 taxa
FG_nf <- subset_samples(ps_FG_x_pruned, sample_type != "forest")
FG_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 7 taxa by 1 taxonomic ranks ]
FG_RA_nf <- transform(FG_nf, "compositional")
FG_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 7 taxa by 1 taxonomic ranks ]
df <- psmelt(FG_RA_nf)
df$year <- "2019"
# sum the values if same genus: Sum by Group Based on dplyr Package
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Saprotroph 0.422 0.0227
## 2 2019 Saprotroph-Symbiotroph 0.216 0.0168
## 3 2019 Pathotroph-Saprotroph 0.135 0.0143
## 4 2019 Pathotroph 0.0858 0.00824
## 5 2019 Symbiotroph 0.0855 0.0120
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Saprotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 435672, p-value = 0.000113
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3384715
##
## [1] "Saprotroph-Symbiotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 312947, p-value = 0.6694
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.038566
##
## [1] "Pathotroph-Saprotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 276512, p-value = 0.09387
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1505015
##
## [1] "Pathotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 525434, p-value = 2.576e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6142379
##
## [1] "Symbiotroph"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 223897, p-value = 0.0003944
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3121434
ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_phy <- aggregate_rare(ps_RA_nf, level = "phylum", detection = 0.0, prevalence = 0.0)
ps_RA_nf_phy
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(ps_RA_nf_phy)
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Ascomycota 0.692 0.0151
## 2 2019 Basidiomycota 0.199 0.0134
## 3 2019 Mortierellomycota 0.0793 0.00956
## 4 2019 Glomeromycota 0.0264 0.00683
## 5 2019 Chytridiomycota 0.00209 0.000362
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Ascomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 298097, p-value = 0.3506
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.08418676
##
## [1] "Basidiomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 491192, p-value = 1.347e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5090399
##
## [1] "Mortierellomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.24723
##
## [1] "Glomeromycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2091478
##
## [1] "Chytridiomycota"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 569974, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7510735
ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_cla <- aggregate_rare(ps_RA_nf, level = "class", detection = 0.0, prevalence = 0.0)
ps_RA_nf_cla
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 65 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 65 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_cla)
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.291 0.0218
## 2 2019 Sordariomycetes 0.185 0.0131
## 3 2019 Dothideomycetes 0.137 0.0140
## 4 2019 Tremellomycetes 0.132 0.0111
## 5 2019 Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Leotiomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 139957, p-value = 3.966e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5700243
##
## [1] "Sordariomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 426742, p-value = 0.0004147
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3110364
##
## [1] "Dothideomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 548039, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6836843
##
## [1] "Tremellomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 558143, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7147252
##
## [1] "Mortierellomycetes"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 245027, p-value = 0.005442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.24723
ps_nf <- subset_samples(ps, sample_type != "forest")
ps_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf <- transform(ps_nf, "compositional")
ps_RA_nf
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA_nf_gen <- aggregate_rare(ps_RA_nf, level = "genus", detection = 0.0, prevalence = 0.0)
ps_RA_nf_gen
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 895 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 895 taxa by 2 taxonomic ranks ]
df <- psmelt(ps_RA_nf_gen)
Note! Exclude the obes not classified at genus level!
df$year <- "2019"
x <- df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:6)
print(y)
## # A tibble: 6 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes_unclassified 0.109 0.0168
## 2 2019 Saitozyma 0.0664 0.00702
## 3 2019 Pseudeurotium 0.0533 0.0122
## 4 2019 Paraphaeosphaeria 0.0516 0.0110
## 5 2019 Mortierella 0.0478 0.00662
## 6 2019 Solicoccozyma 0.0463 0.00584
We don’t want to test for Leotiomycetes_unclassified
taxa <- y$OTU
taxa[! taxa %in% c("Leotiomycetes_unclassified")]
## [1] "Saitozyma" "Pseudeurotium" "Paraphaeosphaeria"
## [4] "Mortierella" "Solicoccozyma"
for (i in taxa) {
df_x <- subset(df, OTU == i)
print(i)
x <- cor.test(df_x$Abundance, df_x$depth_numerical, method = "spearman", na.rm = TRUE)
print(x)
}
## [1] "Leotiomycetes_unclassified"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 186445, p-value = 6.745e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4272047
##
## [1] "Saitozyma"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 549794, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6890738
##
## [1] "Pseudeurotium"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 329787, p-value = 0.8841
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.01316959
##
## [1] "Paraphaeosphaeria"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 565995, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7388476
##
## [1] "Mortierella"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 239975, p-value = 0.003073
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2627505
##
## [1] "Solicoccozyma"
##
## Spearman's rank correlation rho
##
## data: df_x$Abundance and df_x$depth_numerical
## S = 545184, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6749137
library('phyloseq')
library("dplyr")
library("tibble")
library("microbiome")
library("tibble")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 29 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
OTU <- as.data.frame(otu_table(ps))
OTU <- t(OTU)
# calculate reads per sample:
OTU2 <- OTU
OTU2 <- as.data.frame(OTU2)
OTU2$reads <- rowSums(OTU2)
# add to meta
meta$reads <- OTU2$reads
rm(OTU2)
OTU <- as.matrix(OTU)
#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)
# add to meta
meta$OTUs <- OTU$OTUs
rm(OTU)
# save new meta
sample_data(ps) <- sample_data(meta)
save(ps, file = 'ps_FINAL')
mean(meta$OTUs)
## [1] 1480.243
# 1480.243
# mean values of OTUs in soil layers
x <- meta %>%
dplyr::group_by(depth) %>%
dplyr::summarise(OTUs_mean = mean(OTUs, na.rm = TRUE), OTUs_se = (sd(OTUs, na.rm = TRUE)/sqrt(length((OTUs)))))
print(paste("how many OTUs on average in each soil layer"))
## [1] "how many OTUs on average in each soil layer"
print(x)
## # A tibble: 5 × 3
## depth OTUs_mean OTUs_se
## <chr> <dbl> <dbl>
## 1 0...10 2184. 183.
## 2 10...20 2495. 134.
## 3 20...30 1900. 187.
## 4 30...40 607. 86.0
## 5 40... 215. 39.9
y <- meta %>%
dplyr::group_by(depth) %>%
dplyr::summarise(reads_mean = mean(reads, na.rm = TRUE), reads_se = (sd(reads, na.rm = TRUE)/sqrt(length((reads)))))
print(paste("how many reads on average in each soil layer"))
## [1] "how many reads on average in each soil layer"
print(y)
## # A tibble: 5 × 3
## depth reads_mean reads_se
## <chr> <dbl> <dbl>
## 1 0...10 80957. 4871.
## 2 10...20 95172. 6279.
## 3 20...30 97826. 8155.
## 4 30...40 102635. 15860.
## 5 40... 35534. 8482.
xy <- left_join(x, y, by="depth")
# save
write.csv2(xy, file = "OTUs_and_reads_in_depths.csv", row.names = FALSE)
library("metagMisc")
ps_x <- phyloseq_average(
ps,
avg_type = "arithmetic",
group = "depth",
drop_group_zero = FALSE,
verbose = FALSE,
progress = NULL
)
ps_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 5 samples ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# 20610 taxa and 5 samples
OTU <- as.data.frame(otu_table(ps_x))
OTU <- t(OTU)
OTU <- as.matrix(OTU)
#convert an abundance matrix to a presence-absence matrix. (make any number greater than 0 into a 1)
OTU[OTU > 0] <- 1 #converts from abundance to P/A
OTU <- as.data.frame(OTU)
# calculate number of different otus in a sample
OTU$OTUs <- rowSums(OTU)
OTU$depth <- rownames(OTU)
OTU <- OTU[, 20611:20612]
print(paste("how many OTUs in total in each soil layer"))
## [1] "how many OTUs in total in each soil layer"
print(OTU)
## OTUs depth
## 0...10 14737 0...10
## 10...20 16268 10...20
## 20...30 14763 20...30
## 30...40 5367 30...40
## 40... 2563 40...
library(car)
# Using leveneTest()
result = leveneTest(OTUs ~ depth, meta)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 16.055 8.907e-11 ***
## 135
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# not homogenous
# perform the Kruskal test
kruskal.test(OTUs ~ depth, data = meta)
##
## Kruskal-Wallis rank sum test
##
## data: OTUs by depth
## Kruskal-Wallis chi-squared = 91.593, df = 4, p-value < 2.2e-16
pairwise.wilcox.test(meta$OTUs, meta$depth,
p.adjust.method = "BH")
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: meta$OTUs and meta$depth
##
## 0...10 10...20 20...30 30...40
## 10...20 0.30647 - - -
## 20...30 0.33226 0.02782 - -
## 30...40 2.4e-09 8.9e-10 4.1e-07 -
## 40... 8.9e-10 8.9e-10 1.7e-09 0.00065
##
## P value adjustment method: BH
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load(file = 'ps_FG_with_NAs')#ps_FG
ps_FG
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 11 taxonomic ranks ]
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 31 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
meta <- meta(ps)
x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Saprotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4842 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 4842 taxa by 11 taxonomic ranks ]
# 4842 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data into data frame
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to saprotroph_richness
colnames(richness_df)[31] <- "saprotroph_richness"
# and lets remove the "variable" and "se" columns
richness_df <- subset(richness_df, select = -variable)
richness_df <- subset(richness_df, select = -se)
FG_richness <- richness_df
x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Symbiotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 784 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 784 taxa by 11 taxonomic ranks ]
# 784 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to symbiotroph_richness
colnames(richness_df)[31] <- "symbiotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 31)]
# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
FG_richness <- subset(FG_richness, select = -samples)
# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]
x_sub <- subset_taxa(ps_FG, trophicMode %in% c("Pathotroph"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1500 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 28 sample variables ]
## tax_table() Taxonomy Table: [ 1500 taxa by 11 taxonomic ranks ]
# 1500 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to pathotroph_richness
colnames(richness_df)[31] <- "pathotroph_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 31)]
# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]
Note! AMF richness calculated from FUNGuild is exactly the same (not shown here)!
x_sub <- subset_taxa(ps, phylum %in% c("Glomeromycota"))
x_sub
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 31 sample variables ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples
# Save output of `plot_richness` as a variable
richness <- plot_richness(x_sub, x = "sample_type", measures = c("Observed"), color = "depth")
# get the data
richness_df <- richness$data
# the observed richness is in the column "value", lets change this to AMF_richness
colnames(richness_df)[34] <- "AMF_richness"
# and lets remove extra columns
richness_df <- richness_df[, c(1, 34)]
# combine
FG_richness <- dplyr::left_join(FG_richness, richness_df, by = "sampleID")
# sampleID into rownames
rownames(FG_richness) <- FG_richness[,1]
# I actually want the meta data from the ps rather than the ps_FG, so leyt's remove extra columns
FG_richness <- FG_richness[, c(1, 29:32)]
meta <- dplyr::left_join(meta, FG_richness, by = "sampleID")
# sampleID into rownames
rownames(meta) <- meta[,1]
sample_data(ps) <- sample_data(meta)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
save(ps, file = 'ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# create your own color palette for sample types
MyPalette <- c(forest = "#1167b1", meadow = "#fbc02d", organic = "#8a8a8a", conventional = "#b71c1c")
# OTU richness
OTU_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(observed, na.rm = TRUE), se = (sd(observed, na.rm = TRUE)/sqrt(length((observed))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fungal richness") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
print(OTU_rich)
Change to thousands of reads (x1000)
rich_k <- meta %>% dplyr::mutate(richness_k = observed/1000 ) %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(richness_k, na.rm = TRUE), se = (sd(richness_k, na.rm = TRUE)/sqrt(length((richness_k))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Fungal richness \n (x1000)") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
rich_k
gm_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(AMF_richness, na.rm = TRUE), se = (sd(AMF_richness, na.rm = TRUE)/sqrt(length((AMF_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="AMF richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
gm_rich
Saprotroph_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(saprotroph_richness, na.rm = TRUE), se = (sd(saprotroph_richness, na.rm = TRUE)/sqrt(length((saprotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Saprotroph richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
Saprotroph_rich
symb_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(symbiotroph_richness, na.rm = TRUE), se = (sd(symbiotroph_richness, na.rm = TRUE)/sqrt(length((symbiotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Symbiotroph richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
symb_rich
path_rich <- meta %>%
group_by(sample_type, depth_numerical) %>%
summarise(mean = mean(pathotroph_richness, na.rm = TRUE), se = (sd(pathotroph_richness, na.rm = TRUE)/sqrt(length((pathotroph_richness))))) %>%
ungroup() %>%
ggplot(aes(y=mean, depth_numerical, color=sample_type)) +
geom_line(linetype = "dashed") +
geom_point(size=3, position=position_dodge(1.2)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.5, position=position_dodge(1.2)) + coord_flip() + scale_x_reverse() + theme(plot.title = element_text(size = 20, hjust = 0.5)) + theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5)) + theme_cowplot() +
theme(axis.text = element_text(size=16),
axis.title = element_text(size=16),
legend.text = element_text(size=16),
legend.title = element_text(size=16),
title = element_text(size=18)) +
scale_y_continuous(name="Pathotroph richness \n") +
labs(x = "depth") + theme(legend.title = element_blank()) + scale_colour_manual(values = MyPalette)
path_rich
figure <- ggarrange(rich_k, gm_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), Saprotroph_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"), path_rich + rremove("ylab") + rremove("y.text") + rremove("y.axis") + rremove("y.ticks"),
labels = c("A", "B", "C", "D"),
ncol = 4, nrow = 1, common.legend = TRUE, legend = "right")
figure
library("multcomp")
Change depth and richness measure accordingly
Test these: - observed - AMF_richness - saprotroph_richness - pathotroph_richness
library(car)
means_and_ses <- list()
meta$depth <- as.factor(meta$depth)
rich <-c("observed", "AMF_richness", "saprotroph_richness", "pathotroph_richness")
for (i in rich) {
for (j in levels(meta$depth)) {
df <- filter(meta, depth==j)
print(i)
print(j)
# Levene's test for homogeneity of variance
result <- leveneTest(df[[i]] ~ sample_type, data = df)
print(result)
# Kruskal-Wallis test
k <- kruskal.test(df[[i]] ~ sample_type, data = df)
print(k)
# Pairwise Wilcoxon test
w <- pairwise.wilcox.test(df[[i]], df$sample_type, p.adjust.method = "BH")
print(w)
# ANOVA
res.aov <- aov(df[[i]] ~ sample_type, data = df)
aov_summary <- summary(res.aov)
print(aov_summary)
# Tukey's HSD test
tukey <- TukeyHSD(res.aov)
print(tukey)
# Group by and summarize
mean_and_se <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(!!sym(i), na.rm = TRUE), se = sd(!!sym(i), na.rm = TRUE) / sqrt(n()))
# Store the result in the list with a descriptive name
result_name <- paste("depth", j, "diversity", i, sep = "_")
means_and_ses[[result_name]] <- mean_and_se
}
}
## [1] "observed"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.0589 0.3849
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 17.971, df = 3, p-value = 0.0004458
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.59636 - -
## organic 0.03636 0.00093 -
## conventional 0.03636 0.00093 0.67297
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 17009022 5669674 16.24 5.61e-06 ***
## Residuals 24 8379531 349147
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -345.2917 -1448.8240 758.2407 0.8235682
## organic-forest 1417.2083 313.6760 2520.7407 0.0084047
## conventional-forest 1246.1111 159.4276 2332.7947 0.0204125
## organic-meadow 1762.5000 947.4873 2577.5127 0.0000209
## conventional-meadow 1591.4028 799.3528 2383.4527 0.0000593
## conventional-organic -171.0972 -963.1472 620.9527 0.9323217
##
## [1] "observed"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.6778 0.1983
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.1193, df = 3, p-value = 0.02775
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.200 - -
## organic 0.068 0.459 -
## conventional 0.068 0.068 0.541
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 4612134 1537378 4.074 0.0179 *
## Residuals 24 9056485 377354
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 611.9167 -535.3253 1759.159 0.4695196
## organic-forest 1043.5417 -103.7003 2190.784 0.0838551
## conventional-forest 1288.4444 158.7186 2418.170 0.0212276
## organic-meadow 431.6250 -415.6694 1278.919 0.5083506
## conventional-meadow 676.5278 -146.8943 1499.950 0.1343587
## conventional-organic 244.9028 -578.5193 1068.325 0.8442175
##
## [1] "observed"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.0857 0.1287
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.6562, df = 3, p-value = 0.1988
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.15 - -
## organic 0.25 0.96 -
## conventional 0.42 0.96 0.72
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 4742212 1580737 1.755 0.183
## Residuals 24 21619303 900804
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 1334.2083 -438.3317 3106.7484 0.1894002
## organic-forest 1381.5833 -390.9567 3154.1234 0.1662604
## conventional-forest 1032.6667 -712.8101 2778.1435 0.3804286
## organic-meadow 47.3750 -1261.7326 1356.4826 0.9996332
## conventional-meadow -301.5417 -1573.7656 970.6822 0.9131616
## conventional-organic -348.9167 -1621.1406 923.3072 0.8729624
##
## [1] "observed"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.6803 0.06959 .
## 24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.7523, df = 3, p-value = 0.2895
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.46 - -
## organic 0.46 0.46 -
## conventional 0.46 0.46 0.67
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 986615 328872 1.716 0.19
## Residuals 24 4599845 191660
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 624.41667 -193.1945 1442.0279 0.1795937
## organic-forest 289.91667 -527.6945 1107.5279 0.7630092
## conventional-forest 342.44444 -462.6834 1147.5723 0.6489479
## organic-meadow -334.50000 -938.3459 269.3459 0.4370977
## conventional-meadow -281.97222 -868.8049 304.8605 0.5563050
## conventional-organic 52.52778 -534.3049 639.3605 0.9945665
##
## [1] "observed"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.3401 0.2848
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.863, df = 3, p-value = 0.01249
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.153 - -
## organic 0.056 0.056 -
## conventional 0.175 0.963 0.056
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 407159 135720 4.089 0.0177 *
## Residuals 24 796553 33190
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 114.75000 -225.487851 454.98785 0.7889685
## organic-forest 337.87500 -2.362851 678.11285 0.0520810
## conventional-forest 77.11111 -257.931964 412.15419 0.9197120
## organic-meadow 223.12500 -28.157306 474.40731 0.0944700
## conventional-meadow -37.63889 -281.841396 206.56362 0.9736063
## conventional-organic -260.76389 -504.966396 -16.56138 0.0333018
##
## [1] "AMF_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.4636 0.7103
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.1971, df = 3, p-value = 0.04211
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.33 - -
## organic 0.13 0.56 -
## conventional 0.13 0.13 0.15
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 664.5 221.5 3.238 0.0398 *
## Residuals 24 1641.6 68.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 6.708333 -8.7373996 22.15407 0.6339029
## organic-forest 9.083333 -6.3623996 24.52907 0.3855925
## conventional-forest 15.555556 0.3456492 30.76546 0.0437049
## organic-meadow 2.375000 -9.0324298 13.78243 0.9387885
## conventional-meadow 8.847222 -2.2388068 19.93325 0.1515032
## conventional-organic 6.472222 -4.6138068 17.55825 0.3918515
##
## [1] "AMF_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.1141 0.951
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.4387, df = 3, p-value = 0.05916
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.076 - -
## organic 0.170 0.442 -
## conventional 0.076 0.699 0.433
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 894 298.00 4.384 0.0135 *
## Residuals 24 1631 67.97
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 18.5833333 3.186290 33.98038 0.0139049
## organic-forest 12.4583333 -2.938710 27.85538 0.1432253
## conventional-forest 17.7777778 2.615818 32.93974 0.0173327
## organic-meadow -6.1250000 -17.496470 5.24647 0.4611598
## conventional-meadow -0.8055556 -11.856638 10.24553 0.9970402
## conventional-organic 5.3194444 -5.731638 16.37053 0.5548799
##
## [1] "AMF_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.1721 0.1176
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.239, df = 3, p-value = 0.01664
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.056 - -
## organic 0.056 0.154 -
## conventional 0.404 0.068 0.402
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 2273 757.8 4.859 0.00883 **
## Residuals 24 3743 156.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 28.708333 5.385511 52.0311555 0.0119094
## organic-forest 18.083333 -5.239489 41.4061555 0.1696484
## conventional-forest 11.222222 -11.744505 34.1889499 0.5427300
## organic-meadow -10.625000 -27.850046 6.6000457 0.3447892
## conventional-meadow -17.486111 -34.225847 -0.7463756 0.0383380
## conventional-organic -6.861111 -23.600847 9.8786244 0.6746486
##
## [1] "AMF_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.2858 0.1044
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.515, df = 3, p-value = 0.02317
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.073 - -
## organic 0.643 0.228 -
## conventional 0.926 0.023 0.643
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 3828 1275.9 5.884 0.00369 **
## Residuals 24 5204 216.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 26.875000 -0.6265144 54.376514 0.0571204
## organic-forest 10.375000 -17.1265144 37.876514 0.7276472
## conventional-forest -1.777778 -28.8593971 25.303842 0.9978313
## organic-meadow -16.500000 -36.8112145 3.811214 0.1408901
## conventional-meadow -28.652778 -48.3917302 -8.913825 0.0027413
## conventional-organic -12.152778 -31.8917302 7.586175 0.3463849
##
## [1] "AMF_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.591 0.2176
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 4.4541, df = 3, p-value = 0.2164
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.36 - -
## organic 0.36 0.83 -
## conventional 0.51 0.36 0.36
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 267.9 89.31 1.488 0.243
## Residuals 24 1440.9 60.04
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 6.0416667 -8.429273 20.512607 0.6619918
## organic-forest 7.4166667 -7.054273 21.887607 0.5032636
## conventional-forest 0.7777778 -13.472219 15.027774 0.9987481
## organic-meadow 1.3750000 -9.312497 12.062497 0.9842975
## conventional-meadow -5.2638889 -15.650269 5.122492 0.5126347
## conventional-organic -6.6388889 -17.025269 3.747492 0.3148737
##
## [1] "saprotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.236 0.3185
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 10.335, df = 3, p-value = 0.01592
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.921 - -
## organic 0.170 0.046 -
## conventional 0.283 0.046 0.283
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 552978 184326 5.106 0.00711 **
## Residuals 24 866367 36099
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 22.66667 -332.1680785 377.5014 0.9980002
## organic-forest 354.66667 -0.1680785 709.5014 0.0501395
## conventional-forest 219.11111 -130.3059916 568.5282 0.3308818
## organic-meadow 332.00000 69.9371730 594.0628 0.0094204
## conventional-meadow 196.44444 -58.2348452 451.1237 0.1730235
## conventional-organic -135.55556 -390.2348452 119.1237 0.4713122
##
## [1] "saprotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.3935 0.2689
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.4841, df = 3, p-value = 0.037
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.073 - -
## organic 0.170 0.193 -
## conventional 0.073 0.185 0.888
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 368992 122997 3.91 0.0209 *
## Residuals 24 754986 31458
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 405.16667 73.92528 736.40806 0.0125214
## organic-forest 249.79167 -81.44972 581.03306 0.1881316
## conventional-forest 260.11111 -66.07286 586.29508 0.1519732
## organic-meadow -155.37500 -400.01298 89.26298 0.3201964
## conventional-meadow -145.05556 -382.80094 92.68983 0.3540753
## conventional-organic 10.31944 -227.42594 248.06483 0.9993682
##
## [1] "saprotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.4484 0.2535
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 9.2572, df = 3, p-value = 0.02606
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.036 - -
## organic 0.267 0.292 -
## conventional 0.447 0.036 0.541
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 784367 261456 3.683 0.0259 *
## Residuals 24 1703653 70986
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 542.7083 45.12498 1040.29168 0.0289462
## organic-forest 358.2083 -139.37502 855.79168 0.2210832
## conventional-forest 230.3333 -259.65288 720.31955 0.5737676
## organic-meadow -184.5000 -551.98966 182.98966 0.5204628
## conventional-meadow -312.3750 -669.51075 44.76075 0.1015303
## conventional-organic -127.8750 -485.01075 229.26075 0.7576950
##
## [1] "saprotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.706 0.1924
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 6.5626, df = 3, p-value = 0.08723
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.388 - -
## organic 0.864 0.249 -
## conventional 0.864 0.091 0.482
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 192644 64215 3.255 0.0392 *
## Residuals 24 473414 19726
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 186.708333 -75.59003 449.006696 0.2294520
## organic-forest 2.083333 -260.21503 264.381696 0.9999961
## conventional-forest 5.111111 -253.18246 263.404684 0.9999398
## organic-meadow -184.625000 -378.34518 9.095179 0.0656538
## conventional-meadow -181.597222 -369.85940 6.664952 0.0614447
## conventional-organic 3.027778 -185.23440 191.289952 0.9999677
##
## [1] "saprotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.8324 0.4892
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 3.6785, df = 3, p-value = 0.2983
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.39 - -
## organic 0.39 0.76 -
## conventional 0.39 0.47 0.81
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 9406 3135 0.892 0.459
## Residuals 24 84343 3514
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 52.66667 -58.04632 163.37965 0.5643329
## organic-forest 47.29167 -63.42132 158.00465 0.6458956
## conventional-forest 20.00000 -89.02261 129.02261 0.9568518
## organic-meadow -5.37500 -87.14196 76.39196 0.9978224
## conventional-meadow -32.66667 -112.12987 46.79653 0.6726275
## conventional-organic -27.29167 -106.75487 52.17153 0.7798186
##
## [1] "pathotroph_richness"
## [1] "0...10"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 3.5826 0.02853 *
## 24
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 19.378, df = 3, p-value = 0.0002283
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.93091 - -
## organic 0.01818 0.00047 -
## conventional 0.01818 0.00047 1.00000
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 404412 134804 25.49 1.23e-07 ***
## Residuals 24 126933 5289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest -11.041667 -146.86103 124.7777 0.9959110
## organic-forest 234.833333 99.01397 370.6527 0.0004079
## conventional-forest 240.666667 106.92100 374.4123 0.0002508
## organic-meadow 245.875000 145.56575 346.1843 0.0000031
## conventional-meadow 251.708333 154.22526 349.1914 0.0000013
## conventional-organic 5.833333 -91.64974 103.3164 0.9983535
##
## [1] "pathotroph_richness"
## [1] "10...20"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.2339 0.1102
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 21.058, df = 3, p-value = 0.0001024
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.02909 - -
## organic 0.01818 0.00047 -
## conventional 0.01818 0.00047 0.13879
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 298602 99534 29.77 2.95e-08 ***
## Residuals 24 80239 3343
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 61.62500 -46.36138 169.6114 0.4114727
## organic-forest 219.25000 111.26362 327.2364 0.0000513
## conventional-forest 274.66667 168.32903 381.0043 0.0000013
## organic-meadow 157.62500 77.87177 237.3782 0.0000742
## conventional-meadow 213.04167 135.53546 290.5479 0.0000005
## conventional-organic 55.41667 -22.08954 132.9229 0.2261312
##
## [1] "pathotroph_richness"
## [1] "20...30"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 2.2613 0.1071
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 8.05, df = 3, p-value = 0.04499
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.073 - -
## organic 0.073 0.157 -
## conventional 0.104 0.386 0.888
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 93092 31031 2.547 0.0797 .
## Residuals 24 292356 12182
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 75.58333 -130.54194 281.7086 0.7443125
## organic-forest 171.70833 -34.41694 377.8336 0.1266336
## conventional-forest 157.66667 -45.31147 360.6448 0.1684675
## organic-meadow 96.12500 -56.10861 248.3586 0.3250720
## conventional-meadow 82.08333 -65.86114 230.0278 0.4357382
## conventional-organic -14.04167 -161.98614 133.9028 0.9935429
##
## [1] "pathotroph_richness"
## [1] "30...40"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.1511 0.3488
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 5.8552, df = 3, p-value = 0.1189
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.18 - -
## organic 0.49 0.75 -
## conventional 0.18 0.18 0.47
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 1589 529.8 1.81 0.172
## Residuals 24 7024 292.7
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 11.208333 -20.742112 43.15878 0.7687688
## organic-forest 15.958333 -15.992112 47.90878 0.5247314
## conventional-forest 24.444444 -7.018179 55.90707 0.1683269
## organic-meadow 4.750000 -18.846968 28.34697 0.9441916
## conventional-meadow 13.236111 -9.696019 36.16824 0.4016916
## conventional-organic 8.486111 -14.446019 31.41824 0.7390336
##
## [1] "pathotroph_richness"
## [1] "40..."
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.33 0.2879
## 24
##
## Kruskal-Wallis rank sum test
##
## data: df[[i]] by sample_type
## Kruskal-Wallis chi-squared = 7.1979, df = 3, p-value = 0.06585
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df[[i]] and df$sample_type
##
## forest meadow organic
## meadow 0.083 - -
## organic 0.083 0.665 -
## conventional 0.083 0.727 0.665
##
## P value adjustment method: BH
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 3 114.1 38.03 1.368 0.276
## Residuals 24 666.9 27.79
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = df[[i]] ~ sample_type, data = df)
##
## $sample_type
## diff lwr upr p adj
## meadow-forest 4.166667 -5.678021 14.011355 0.6524360
## organic-forest 6.166667 -3.678021 16.011355 0.3317956
## conventional-forest 2.111111 -7.583267 11.805490 0.9308262
## organic-meadow 2.000000 -5.270784 9.270784 0.8719962
## conventional-meadow -2.055556 -9.121487 5.010376 0.8525672
## conventional-organic -4.055556 -11.121487 3.010376 0.4065242
# Combine list into a single dataframe
combined_df <- bind_rows(lapply(names(means_and_ses), function(name) {
df <- means_and_ses[[name]]
df$result_name <- name
return(df)
}), .id = "id")
# View the combined dataframe
print(combined_df)
## # A tibble: 80 × 5
## id sample_type mean se result_name
## <chr> <fct> <dbl> <dbl> <chr>
## 1 1 forest 1478. 438. depth_0...10_diversity_observed
## 2 1 meadow 1132. 158. depth_0...10_diversity_observed
## 3 1 organic 2895. 242. depth_0...10_diversity_observed
## 4 1 conventional 2724. 188. depth_0...10_diversity_observed
## 5 2 forest 1608. 93.0 depth_10...20_diversity_observed
## 6 2 meadow 2220. 219. depth_10...20_diversity_observed
## 7 2 organic 2652. 265. depth_10...20_diversity_observed
## 8 2 conventional 2897. 181. depth_10...20_diversity_observed
## 9 3 forest 792. 249. depth_20...30_diversity_observed
## 10 3 meadow 2126. 226. depth_20...30_diversity_observed
## # ℹ 70 more rows
# save the mean and se values
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(combined_df, file = "Richness_mean_and_ses.csv")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_RA <- transform(ps, "compositional")
meta <- meta(ps)
ps_RA_x <- aggregate_rare(ps_RA, level = "phylum", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 14 taxa by 1 taxonomic ranks ]
# 14 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
x_df$year <- "2019"
x <- x_df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Ascomycota 0.661 0.0163
## 2 2019 Basidiomycota 0.235 0.0159
## 3 2019 Mortierellomycota 0.0759 0.00873
## 4 2019 Glomeromycota 0.0240 0.00613
## 5 2019 Rozellomycota 0.00228 0.00109
library(car)
taxa <- y$OTU
for (i in taxa) {
df <- filter(x_df, OTU == i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
print(w)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.5105 0.2146
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 27.535, df = 3, p-value = 4.546e-06
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 5.9e-05 - -
## organic 2.2e-07 0.42 -
## conventional 1.4e-07 0.35 0.70
##
## P value adjustment method: BH
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.2681 0.2879
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 26.818, df = 3, p-value = 6.428e-06
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 1.1e-05 - -
## organic 1.1e-05 0.52 -
## conventional 2.2e-07 0.48 0.21
##
## P value adjustment method: BH
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.6242 0.1867
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 7.126, df = 3, p-value = 0.06799
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.902 - -
## organic 0.902 0.902 -
## conventional 0.180 0.180 0.076
##
## P value adjustment method: BH
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.0182 0.3867
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8342, df = 3, p-value = 0.12
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.14 - -
## organic 0.27 0.36 -
## conventional 0.30 0.27 0.91
##
## P value adjustment method: BH
## [1] "Rozellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 4.5426 0.004562 **
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 11.375, df = 3, p-value = 0.00986
##
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.120 - -
## organic 0.058 0.120 -
## conventional 0.436 0.120 0.041
##
## P value adjustment method: BH
x <- x_df %>%
filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups: OTU [5]
## OTU sample_type mean se
## <chr> <fct> <dbl> <dbl>
## 1 Ascomycota forest 0.402 0.0486
## 2 Ascomycota meadow 0.670 0.0293
## 3 Ascomycota organic 0.692 0.0264
## 4 Ascomycota conventional 0.711 0.0231
## 5 Basidiomycota forest 0.533 0.0555
## 6 Basidiomycota meadow 0.210 0.0261
## 7 Basidiomycota organic 0.220 0.0247
## 8 Basidiomycota conventional 0.170 0.0191
## 9 Glomeromycota forest 0.00368 0.00177
## 10 Glomeromycota meadow 0.0325 0.0121
## 11 Glomeromycota organic 0.0333 0.0158
## 12 Glomeromycota conventional 0.0149 0.00690
## 13 Mortierellomycota forest 0.0470 0.0158
## 14 Mortierellomycota meadow 0.0860 0.0195
## 15 Mortierellomycota organic 0.0522 0.0120
## 16 Mortierellomycota conventional 0.0975 0.0167
## 17 Rozellomycota forest 0.0131 0.00974
## 18 Rozellomycota meadow 0.000315 0.0000708
## 19 Rozellomycota organic 0.000163 0.0000405
## 20 Rozellomycota conventional 0.00228 0.000714
x_df_nf <- subset(x_df, sample_type!="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Ascomycota 0.692 0.0151
## 2 2019 Basidiomycota 0.199 0.0134
## 3 2019 Mortierellomycota 0.0793 0.00956
## 4 2019 Glomeromycota 0.0264 0.00683
## 5 2019 Chytridiomycota 0.00209 0.000362
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Ascomycota 0...10 0.702 0.0227
## 2 Ascomycota 10...20 0.682 0.0190
## 3 Ascomycota 20...30 0.696 0.0207
## 4 Ascomycota 30...40 0.627 0.0486
## 5 Ascomycota 40... 0.752 0.0431
## 6 Basidiomycota 0...10 0.265 0.0198
## 7 Basidiomycota 10...20 0.283 0.0188
## 8 Basidiomycota 20...30 0.196 0.0248
## 9 Basidiomycota 30...40 0.114 0.0320
## 10 Basidiomycota 40... 0.136 0.0364
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(x, file = "5_Phyla_in_soil_layers_WITHOUT_forest_mean.csv")
library(car)
library("rcompanion")
library("multcompView")
taxa <- y$OTU
for (i in taxa) {
df <- subset(x_df_nf, OTU==i)
# Using leveneTest()
print(i)
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Ascomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 7.2906 2.73e-05 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 4.2424, df = 4, p-value = 0.3742
##
## 0...10 10...20 20...30 30...40
## 10...20 0.6802454 NA NA NA
## 20...30 0.8626083 0.6802454 NA NA
## 30...40 0.6802454 0.8626083 0.7375178 NA
## 40... 0.6574517 0.6574517 0.6574517 0.6212054
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Basidiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.0875 0.3659
## 120
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 38.942, df = 4, p-value = 7.161e-08
##
## 0...10 10...20 20...30 30...40
## 10...20 6.119242e-01 NA NA NA
## 20...30 3.091939e-02 4.118506e-03 NA NA
## 30...40 2.260766e-06 2.260766e-06 0.002588052 NA
## 40... 1.622194e-03 5.646255e-04 0.025631670 0.6721398
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "c"
## [1] "Mortierellomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 5.562 0.0003841 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
##
## 0...10 10...20 20...30 30...40
## 10...20 8.866322e-02 NA NA NA
## 20...30 3.972787e-05 3.234148e-04 NA NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213 NA
## 40... 8.777488e-01 7.468220e-01 0.04799830 0.0008671149
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "a"
## [1] "Glomeromycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.9745 0.0009581 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 23.482, df = 4, p-value = 0.0001014
##
## 0...10 10...20 20...30 30...40
## 10...20 9.666451e-03 NA NA NA
## 20...30 3.425034e-05 0.009666451 NA NA
## 30...40 8.343651e-04 0.003236620 0.04959274 NA
## 40... 5.593752e-01 0.510709727 0.33419998 0.1147993
## 0...10 10...20 20...30 30...40 40...
## "a" "b" "c" "d" "abcd"
## [1] "Chytridiomycota"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.0054 0.004377 **
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 84.286, df = 4, p-value < 2.2e-16
##
## 0...10 10...20 20...30 30...40
## 10...20 5.507318e-01 NA NA NA
## 20...30 3.796944e-01 2.030024e-01 NA NA
## 30...40 1.071611e-08 1.071611e-08 1.991630e-07 NA
## 40... 6.881199e-10 6.881199e-10 7.192628e-09 0.1413528
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "b" "b"
taxa <- "Glomeromycota"
# Construct a data.frame with the selected taxonomic group
df <- filter(x_df_nf, OTU == taxa & depth_numerical > 40)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 1.1016 0.35
## 22
x <- df %>%
group_by(sample_type) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 3 × 3
## sample_type mean se
## <fct> <dbl> <dbl>
## 1 meadow 0.0779 0.0511
## 2 organic 0.0404 0.0179
## 3 conventional 0.0165 0.0139
# one-way ANOVA
res.aov <- aov(Abundance ~ sample_type, data = df)
# Summary of the analysis
print(summary(res.aov))
## Df Sum Sq Mean Sq F value Pr(>F)
## sample_type 2 0.0161 0.008049 0.992 0.387
## Residuals 22 0.1785 0.008113
x_df_nf <- subset(x_df, sample_type=="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Basidiomycota 0.533 0.0555
## 2 2019 Ascomycota 0.402 0.0486
## 3 2019 Mortierellomycota 0.0470 0.0158
## 4 2019 Rozellomycota 0.0131 0.00974
## 5 2019 Glomeromycota 0.00368 0.00177
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Ascomycota 0...10 0.478 0.0839
## 2 Ascomycota 10...20 0.516 0.0422
## 3 Ascomycota 20...30 0.274 0.160
## 4 Ascomycota 30...40 0.407 0.129
## 5 Ascomycota 40... 0.336 0.109
## 6 Basidiomycota 0...10 0.449 0.121
## 7 Basidiomycota 10...20 0.457 0.0382
## 8 Basidiomycota 20...30 0.597 0.213
## 9 Basidiomycota 30...40 0.510 0.149
## 10 Basidiomycota 40... 0.649 0.0941
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(x, file = "5_Phyla_in_soil_layers_ONLY_forest_mean.csv")
taxa <- "Glomeromycota"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 3.1178 0.06588 .
## 10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
kruskal.test(Abundance ~ depth, data = df)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 7.3861, df = 4, p-value = 0.1168
ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
x_df$year <- "2019"
x <- x_df %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.282 0.0200
## 2 2019 Sordariomycetes 0.169 0.0124
## 3 2019 Dothideomycetes 0.127 0.0128
## 4 2019 Tremellomycetes 0.126 0.0105
## 5 2019 Agaricomycetes 0.0884 0.0145
taxa <- y$OTU
x <- x_df %>%
filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups: OTU [5]
## OTU sample_type mean se
## <chr> <fct> <dbl> <dbl>
## 1 Agaricomycetes forest 0.448 0.0653
## 2 Agaricomycetes meadow 0.0583 0.00947
## 3 Agaricomycetes organic 0.0592 0.0217
## 4 Agaricomycetes conventional 0.0215 0.00727
## 5 Dothideomycetes forest 0.0501 0.0130
## 6 Dothideomycetes meadow 0.189 0.0351
## 7 Dothideomycetes organic 0.109 0.0178
## 8 Dothideomycetes conventional 0.115 0.0151
## 9 Leotiomycetes forest 0.202 0.0354
## 10 Leotiomycetes meadow 0.301 0.0367
## 11 Leotiomycetes organic 0.270 0.0398
## 12 Leotiomycetes conventional 0.302 0.0375
## 13 Sordariomycetes forest 0.0309 0.00929
## 14 Sordariomycetes meadow 0.0874 0.0157
## 15 Sordariomycetes organic 0.239 0.0229
## 16 Sordariomycetes conventional 0.225 0.0210
## 17 Tremellomycetes forest 0.0757 0.0285
## 18 Tremellomycetes meadow 0.139 0.0215
## 19 Tremellomycetes organic 0.148 0.0209
## 20 Tremellomycetes conventional 0.110 0.0157
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(x, file = "5_Class_in_soil_layers_All_Management.csv")
taxa <- y$OTU
for (i in taxa) {
# Construct a data.frame with the selected taxonomic group
df <- filter(x_df, OTU == i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ sample_type, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ sample_type, data = df)
print(k)
w <- pairwise.wilcox.test(df$Abundance, df$sample_type,
p.adjust.method = "BH")
print(w)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 0.6472 0.586
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 3.8991, df = 3, p-value = 0.2726
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.33 - -
## organic 0.61 0.33 -
## conventional 0.33 0.61 0.33
##
## P value adjustment method: BH
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 8.0545 5.591e-05 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 47.221, df = 3, p-value = 3.12e-10
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.0057 - -
## organic 4.9e-07 1.9e-06 -
## conventional 4.9e-07 1.9e-06 0.6393
##
## P value adjustment method: BH
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 5.7442 0.0009917 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 9.5392, df = 3, p-value = 0.02292
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.024 - -
## organic 0.134 0.134 -
## conventional 0.024 0.415 0.560
##
## P value adjustment method: BH
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.4793 0.223
## 136
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 5.8864, df = 3, p-value = 0.1173
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 0.20 - -
## organic 0.20 0.71 -
## conventional 0.33 0.33 0.23
##
## P value adjustment method: BH
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 13.41 1.018e-07 ***
## 136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by sample_type
## Kruskal-Wallis chi-squared = 44.002, df = 3, p-value = 1.508e-09
##
##
## Pairwise comparisons using Wilcoxon rank sum exact test
##
## data: df$Abundance and df$sample_type
##
## forest meadow organic
## meadow 4.7e-08 - -
## organic 4.7e-08 0.05845 -
## conventional 4.7e-08 0.00015 0.52326
##
## P value adjustment method: BH
x <- x_df %>%
filter(OTU %in% taxa) %>% group_by(OTU, sample_type) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 20 × 4
## # Groups: OTU [5]
## OTU sample_type mean se
## <chr> <fct> <dbl> <dbl>
## 1 Agaricomycetes forest 0.448 0.0653
## 2 Agaricomycetes meadow 0.0583 0.00947
## 3 Agaricomycetes organic 0.0592 0.0217
## 4 Agaricomycetes conventional 0.0215 0.00727
## 5 Dothideomycetes forest 0.0501 0.0130
## 6 Dothideomycetes meadow 0.189 0.0351
## 7 Dothideomycetes organic 0.109 0.0178
## 8 Dothideomycetes conventional 0.115 0.0151
## 9 Leotiomycetes forest 0.202 0.0354
## 10 Leotiomycetes meadow 0.301 0.0367
## 11 Leotiomycetes organic 0.270 0.0398
## 12 Leotiomycetes conventional 0.302 0.0375
## 13 Sordariomycetes forest 0.0309 0.00929
## 14 Sordariomycetes meadow 0.0874 0.0157
## 15 Sordariomycetes organic 0.239 0.0229
## 16 Sordariomycetes conventional 0.225 0.0210
## 17 Tremellomycetes forest 0.0757 0.0285
## 18 Tremellomycetes meadow 0.139 0.0215
## 19 Tremellomycetes organic 0.148 0.0209
## 20 Tremellomycetes conventional 0.110 0.0157
x_df_nf <- subset(x_df, sample_type!="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Leotiomycetes 0.291 0.0218
## 2 2019 Sordariomycetes 0.185 0.0131
## 3 2019 Dothideomycetes 0.137 0.0140
## 4 2019 Tremellomycetes 0.132 0.0111
## 5 2019 Mortierellomycetes 0.0793 0.00956
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Dothideomycetes 0...10 0.296 0.0438
## 2 Dothideomycetes 10...20 0.193 0.0160
## 3 Dothideomycetes 20...30 0.0870 0.0164
## 4 Dothideomycetes 30...40 0.0515 0.0207
## 5 Dothideomycetes 40... 0.0548 0.0172
## 6 Leotiomycetes 0...10 0.115 0.0183
## 7 Leotiomycetes 10...20 0.150 0.0126
## 8 Leotiomycetes 20...30 0.310 0.0394
## 9 Leotiomycetes 30...40 0.422 0.0482
## 10 Leotiomycetes 40... 0.460 0.0626
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(x, file = "5_Classes_in_soil_layers_WITHOUT_forest_mean.csv")
taxa <- y$OTU
for (i in taxa) {
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 14.875 6.712e-10 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 43.951, df = 4, p-value = 6.568e-09
##
## 0...10 10...20 20...30 30...40
## 10...20 2.170595e-02 NA NA NA
## 20...30 1.819319e-05 3.105480e-03 NA NA
## 30...40 1.819319e-05 2.393976e-05 0.08486076 NA
## 40... 1.819319e-05 4.506015e-05 0.11043685 0.8626083
## 0...10 10...20 20...30 30...40 40...
## "a" "b" "c" "c" "c"
## [1] "Sordariomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 2.6173 0.03845 *
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 25.086, df = 4, p-value = 4.835e-05
##
## 0...10 10...20 20...30 30...40
## 10...20 3.753357e-01 NA NA NA
## 20...30 3.753357e-01 2.425450e-01 NA NA
## 30...40 8.223114e-05 8.223114e-05 0.0002982584 NA
## 40... 2.183687e-01 1.084402e-01 0.3753357254 0.1072279
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "b" "ab"
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 5.7029 0.0003088 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 64.123, df = 4, p-value = 3.937e-13
##
## 0...10 10...20 20...30 30...40
## 10...20 3.510510e-01 NA NA NA
## 20...30 9.043065e-07 2.433622e-06 NA NA
## 30...40 4.846508e-08 5.869563e-08 0.0005249541 NA
## 40... 1.060943e-07 3.675922e-07 0.0151251757 0.5900142
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "c"
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 4.5628 0.001825 **
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 72.482, df = 4, p-value = 6.787e-15
##
## 0...10 10...20 20...30 30...40
## 10...20 3.448406e-01 NA NA NA
## 20...30 3.257171e-02 1.715879e-03 NA NA
## 30...40 1.503038e-12 3.164291e-13 6.082530e-06 NA
## 40... 1.211627e-08 4.250065e-09 2.741038e-05 0.04327362
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "d"
## [1] "Mortierellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 5.562 0.0003841 ***
## 120
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 39.225, df = 4, p-value = 6.259e-08
##
## 0...10 10...20 20...30 30...40
## 10...20 8.866322e-02 NA NA NA
## 20...30 3.972787e-05 3.234148e-04 NA NA
## 30...40 1.302516e-06 1.302516e-06 0.02136213 NA
## 40... 8.777488e-01 7.468220e-01 0.04799830 0.0008671149
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "b" "c" "a"
x_df_nf <- subset(x_df, sample_type=="forest")
x_df_nf$depth <- as.factor(x_df_nf$depth)
x_df_nf$year <- "2019"
x <- x_df_nf %>%
group_by(year, OTU) %>%
summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
y <- x %>%
arrange(desc(mean)) %>%
slice(1:5)
print(y)
## # A tibble: 5 × 4
## # Groups: year [1]
## year OTU mean se
## <chr> <chr> <dbl> <dbl>
## 1 2019 Agaricomycetes 0.448 0.0653
## 2 2019 Leotiomycetes 0.202 0.0354
## 3 2019 Tremellomycetes 0.0757 0.0285
## 4 2019 Pezizomycetes 0.0520 0.0233
## 5 2019 Dothideomycetes 0.0501 0.0130
taxa <- y$OTU
x <- x_df_nf %>%
filter(OTU %in% taxa) %>% group_by(OTU, depth) %>% summarise(mean = mean(Abundance, na.rm = TRUE), se = (sd(Abundance, na.rm = TRUE)/sqrt(length((Abundance)))))
print(x)
## # A tibble: 25 × 4
## # Groups: OTU [5]
## OTU depth mean se
## <chr> <fct> <dbl> <dbl>
## 1 Agaricomycetes 0...10 0.217 0.0933
## 2 Agaricomycetes 10...20 0.395 0.0287
## 3 Agaricomycetes 20...30 0.539 0.260
## 4 Agaricomycetes 30...40 0.507 0.151
## 5 Agaricomycetes 40... 0.578 0.0838
## 6 Dothideomycetes 0...10 0.0996 0.0324
## 7 Dothideomycetes 10...20 0.0913 0.00381
## 8 Dothideomycetes 20...30 0.0418 0.0285
## 9 Dothideomycetes 30...40 0.0121 0.0120
## 10 Dothideomycetes 40... 0.00578 0.00573
## # ℹ 15 more rows
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(x, file = "5_Classes_in_soil_layers_ONLY_forest_mean.csv")
taxa <- y$OTU
for (i in taxa) {
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==i)
print(i)
# Using leveneTest()
result = leveneTest(Abundance ~ depth, df)
# print the result
print(result)
# perform the Kruskal test
k <- kruskal.test(Abundance ~ depth, data = df)
print(k)
wilcox.res <- pairwise.wilcox.test(df$Abundance, df$depth,
p.adjust.method = "BH")
wilcox.res <- wilcox.res[["p.value"]]
print(wilcox.res)
mc = fullPTable(wilcox.res)
mc <- multcompLetters(mc)
mc <- mc[["Letters"]]
print(mc)
}
## [1] "Agaricomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.8107 0.5461
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 4.7, df = 4, p-value = 0.3195
##
## 0...10 10...20 20...30 30...40
## 10...20 1.0000000 NA NA NA
## 20...30 1.0000000 1.0 NA NA
## 30...40 0.6666667 1.0 1 NA
## 40... 0.5000000 0.5 1 1
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Leotiomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.2981 0.8726
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 2.0667, df = 4, p-value = 0.7235
##
## 0...10 10...20 20...30 30...40
## 10...20 0.7777778 NA NA NA
## 20...30 0.7777778 0.7777778 NA NA
## 30...40 0.7777778 0.7777778 0.7777778 NA
## 40... 0.7777778 0.7777778 1.0000000 0.7777778
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Tremellomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.3902 0.3055
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 7.1667, df = 4, p-value = 0.1273
##
## 0...10 10...20 20...30 30...40
## 10...20 0.4000000 NA NA NA
## 20...30 0.4000000 0.7777778 NA NA
## 30...40 0.4000000 0.4000000 0.5714286 NA
## 40... 0.5714286 0.7777778 1.0000000 0.4
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Pezizomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 1.392 0.3049
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 8.1, df = 4, p-value = 0.08798
##
## 0...10 10...20 20...30 30...40
## 10...20 0.4000000 NA NA NA
## 20...30 0.2500000 0.5714286 NA NA
## 30...40 0.2500000 0.2500000 0.25 NA
## 40... 0.7777778 0.7777778 1.00 0.5714286
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
## [1] "Dothideomycetes"
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 0.5163 0.7259
## 10
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by depth
## Kruskal-Wallis chi-squared = 8.5667, df = 4, p-value = 0.07289
##
## 0...10 10...20 20...30 30...40
## 10...20 0.7777778 NA NA NA
## 20...30 0.4000000 0.5000000 NA NA
## 30...40 0.4000000 0.3333333 0.5 NA
## 40... 0.3333333 0.3333333 0.5 1
## 0...10 10...20 20...30 30...40 40...
## "a" "a" "a" "a" "a"
ps_RA_x <- aggregate_rare(ps_RA, level = "class", detection = 0, prevalence = 0)
ps_RA_x
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 68 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 68 taxa by 1 taxonomic ranks ]
# 68 taxa and 140 samples
#create data table
x_df <- psmelt(ps_RA_x)
This senetence based on looking at the class composition barplot:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
Let’s test them separately
x_df_nf <- subset(x_df, sample_type=="forest")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Pezizomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 2.6256 0.1291
## 13
# first one-way ANOVA
res.aov <- aov(Abundance ~ compare, data = df)
# Summary of the analysis
print(summary(res.aov))
## Df Sum Sq Mean Sq F value Pr(>F)
## compare 1 0.05219 0.05219 10.96 0.00564 **
## Residuals 13 0.06193 0.00476
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This sentence remains to be tested:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Archaeosporomycetes in organic 30-80 cm, Geoglossomycetes in forest 10-20 cm, meadow 10-40 cm and organic 20-80 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
x_df_nf <- subset(x_df, sample_type=="organic")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Archaeosporomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 5.1753 0.02864 *
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 0, df = 1, p-value = 1
Not significant!
This sentence remains to be tested:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes in meadow 10-40 cm, Microbotryomycetes in conventional 40-80 cm, and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
not sig. in:
x_df_nf <- subset(x_df, sample_type=="meadow")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Geoglossomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 21.441 4.181e-05 ***
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 14.099, df = 1, p-value = 0.0001734
Is sig. in meadow 10-40 cm
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.”
x_df_nf <- subset(x_df, sample_type=="conventional")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Microbotryomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 7.2692 0.009974 **
## 43
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 4.6876, df = 1, p-value = 0.03038
Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Glomeromycetes in meadow 30-80 cm, and Orbiliomycetes.
x_df_nf <- subset(x_df, sample_type=="meadow")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="30...40"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="40..."]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Glomeromycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 7.4061 0.009755 **
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 0.93404, df = 1, p-value = 0.3338
Not sig.
Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm.
x_df_nf <- subset(x_df, sample_type=="meadow")
x_df_nf$compare <- NA
x_df_nf$compare[x_df_nf$depth=="10...20"]<-"yes"
x_df_nf$compare[x_df_nf$depth=="20...30"]<-"yes"
x_df_nf$compare[is.na(x_df_nf$compare)]<-"no"
taxa <- "Orbiliomycetes"
# Construct a data.frame with the selected taxonomic group
df <- subset(x_df_nf, OTU==taxa)
#library(car)
# Using leveneTest()
result = leveneTest(Abundance ~ compare, df)
# print the result
print(result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 18.614 0.0001099 ***
## 38
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# perform the Kruskal test
k <- kruskal.test(Abundance ~ compare, data = df)
print(k)
##
## Kruskal-Wallis rank sum test
##
## data: Abundance by compare
## Kruskal-Wallis chi-squared = 19.128, df = 1, p-value = 1.222e-05
This sentence remains valid:
“Fungal classes that had mostly low proportion but popped up in specific layers were Pezizomycetes in forest 30-40 cm layer (ANOVA; p = 0.006), Geoglossomycetes meadow 10-40 cm (Kruskal; p = 0.000), Microbotryomycetes in conventional 40-80 cm (Kruskal, P = 0.030), and Orbiliomycetes in meadow 10-30 cm (Kruskal; p = 0.000).”
AMF PERMANOVA will be done at genus level, because the PERMANOVA is used to support the AMF bubble plot in STEP 13, which is done at genus level.
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library("pairwiseAdonis")
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_GM <- subset_taxa(ps, phylum=="Glomeromycota")
ps_GM
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples
ps_GM <- aggregate_rare(ps_GM, level = "genus", detection = 0, prevalence = 0)
ps_GM
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 140 samples
# Pick relative abundances (compositional) and sample metadata
ps_RA <- microbiome::transform(ps_GM, "compositional")
otu <- abundances(ps_RA)
meta <- meta(ps)
PERMANOVA cannot handle NAs, so I have to remove samples that do not have any AMF taxa from the analysis
# check how many AMF with dim (although I already know it is 17)
dim(otu)
## [1] 17 140
# If column sum adds up to zero, it means that that sample doesn't have any AMF. check how many zero values samples I have, and remove sample if all (17) are zero
x <- colSums(otu==0)==17
z <- which(x, arr.ind = FALSE, useNames = TRUE)
print(z)
## CG9.1_30to40 CG9.1_40to70 CPO5.1_40to70 CPO5.2_40to70 CR14.1_40to80
## 4 5 20 25 35
## M2_40to60 M3_30to40 M3_40to60 NG2A1_40to70 NG2B3_40to70
## 55 59 60 65 90
## OG10.2_40to70 OG10.3_30to40 OG10.3_40to70 OR13.1_30to40 OR13.1_40to80
## 110 114 115 129 130
Samples_toRemove <- c("CG9.1_30to40", "CG9.1_40to70", "CPO5.1_40to70", "CPO5.2_40to70", "CR14.1_40to80", "M2_40to60", "M3_30to40", "M3_40to60", "NG2A1_40to70", "NG2B3_40to70", "OG10.2_40to70", "OG10.3_30to40", "OG10.3_40to70", "OR13.1_30to40", "OR13.1_40to80")
ps_GM_pruned <- subset_samples(ps_RA, !(sampleID %in% Samples_toRemove))
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples remained in the dataset
# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_GM_pruned),
MARGIN = ifelse(taxa_are_rows(ps_GM_pruned), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_GM_pruned = prune_taxa((prev0 > 0), ps_GM_pruned)
ps_GM_pruned
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
# 17 taxa and 125 samples
# Pick relative abundances (compositional) and sample metadata
ps_RA <- ps_GM_pruned
otu <- abundances(ps_RA)
meta <- meta(ps_RA)
# note! the distance matrix is now at genus level!
ps_RA_bray <- phyloseq::distance(ps_RA, method = "bray")
GP.ord <- ordinate(ps_RA, "PCoA", "bray")
p1 = plot_ordination(ps_RA, GP.ord, type="samples", color="sample_type", shape = "depth")
print(p1)
# first with just soil type and strata option
a <- adonis2(formula = ps_RA_bray~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ sample_type, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$depth)
## Df SumOfSqs R2 F Pr(>F)
## sample_type 3 4.029 0.11393 5.186 1e-04 ***
## Residual 121 31.336 0.88607
## Total 124 35.366 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# then with just depth and strata option
a <- adonis2(formula = ps_RA_bray~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
print(a)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Blocks: strata
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ depth, data = meta, permutations = 9999, method = "bray", by = "margin", strata = meta$sample_type)
## Df SumOfSqs R2 F Pr(>F)
## depth 4 2.978 0.08421 2.7584 2e-04 ***
## Residual 120 32.388 0.91579
## Total 124 35.366 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The AMF communities at genus level differed more between management types (PERMANOVA; R2 = 0.114; p = 0.000) than between soil layers (PERMANOVA; R2 = 0.084; p = 0.000).
# For the full model it matters which "by" option we choose. When by="terms" will assess significance for each term sequentially from first to last, so that the order of terms matter. We will use this, because with sequential analysis we will get R2 values that sum up to 1, and will also get the significance and R2 values for each interaction term separately rather than for the interaction alone
# because sample type had larger R2 I will put it first in the model
# note interaction term was not significant!
final <- adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
print(final)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 9999
##
## adonis2(formula = ps_RA_bray ~ sample_type + depth, data = meta, permutations = 9999, method = "bray", by = "terms")
## Df SumOfSqs R2 F Pr(>F)
## sample_type 3 4.029 0.11393 5.5646 1e-04 ***
## depth 4 3.097 0.08758 3.2082 2e-04 ***
## Residual 117 28.239 0.79849
## Total 124 35.366 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$sample_type)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs forest 1 1.1125799 3.748075 0.06973412 0.009 0.054
## 2 conventional vs meadow 1 1.2654046 4.477131 0.05563234 0.003 0.018
## 3 conventional vs organic 1 0.6465733 2.776924 0.03664604 0.022 0.132
## 4 forest vs meadow 1 0.6459536 2.162313 0.04310632 0.075 0.450
## 5 forest vs organic 1 1.6460377 7.515467 0.14310959 0.001 0.006
## 6 meadow vs organic 1 2.5882231 11.141036 0.13563301 0.001 0.006
## sig
## 1
## 2 .
## 3
## 4
## 5 *
## 6 *
x <- as.data.frame(pair.mod)
print(x)
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 conventional vs forest 1 1.1125799 3.748075 0.06973412 0.009 0.054
## 2 conventional vs meadow 1 1.2654046 4.477131 0.05563234 0.003 0.018
## 3 conventional vs organic 1 0.6465733 2.776924 0.03664604 0.022 0.132
## 4 forest vs meadow 1 0.6459536 2.162313 0.04310632 0.075 0.450
## 5 forest vs organic 1 1.6460377 7.515467 0.14310959 0.001 0.006
## 6 meadow vs organic 1 2.5882231 11.141036 0.13563301 0.001 0.006
## sig
## 1
## 2 .
## 3
## 4
## 5 *
## 6 *
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
write.csv2(x, file = "AMF_Pairwise_PERMANOVA_by_MANAGEMENT.csv")
set.seed(777)
pair.mod<-pairwise.adonis(ps_RA_bray,factors=meta$depth)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 0...10 vs 10...20 1 0.08290182 0.3780362 0.006952001 0.844 1.00
## 2 0...10 vs 20...30 1 0.34215045 1.3034979 0.023569900 0.236 1.00
## 3 0...10 vs 30...40 1 1.17951804 4.3871337 0.080664918 0.006 0.06
## 4 0...10 vs 40... 1 1.18589735 4.2227763 0.089422448 0.005 0.05
## 5 10...20 vs 20...30 1 0.30058651 1.2249127 0.022180438 0.279 1.00
## 6 10...20 vs 30...40 1 1.30971371 5.2305082 0.094703242 0.003 0.03
## 7 10...20 vs 40... 1 1.55697160 6.0029207 0.122501284 0.002 0.02
## 8 20...30 vs 30...40 1 0.53352935 1.7961256 0.034676834 0.134 1.00
## 9 20...30 vs 40... 1 0.85776912 2.7351625 0.059804369 0.031 0.31
## 10 30...40 vs 40... 1 0.30239257 0.9246894 0.023160842 0.453 1.00
## sig
## 1
## 2
## 3
## 4 .
## 5
## 6 .
## 7 .
## 8
## 9
## 10
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
meta$depth <- as.factor(meta$depth)
for (i in meta$depth) {
# subset samples
ps_RA_subset <- subset_samples(ps_RA, sample_type != "forest")
ps_RA_subset <- subset_samples(ps_RA_subset, depth == i)
# lets make sure all missing taxa are removed
prev0 = apply(X = otu_table(ps_RA_subset),
MARGIN = ifelse(taxa_are_rows(ps_RA_subset), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
# Execute prevalence filter, using `prune_taxa()` function
ps_RA_subset = prune_taxa((prev0 > 0), ps_RA_subset)
ps_RA_subset
# 12 taxa and 25 samples
meta_subset <- meta(ps_RA_subset)
ps_RA_subset_bray <- phyloseq::distance(ps_RA_subset, method = "bray")
set.seed(777)
x <- as.data.frame(pairwise.adonis(ps_RA_subset_bray,factors=meta_subset$sample_type))
x
}
library(vegan)
library(goeveg)
library(metagMisc)
library(phyloseq)
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
library(car)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# add sample_type_depth# add soil_type_depth
sample_data(ps)$soil_type_depth <- paste(sample_data(ps)$sample_type, "_", sample_data(ps)$depth)
meta <- meta(ps)
Note! first transform to RA and then filter AMF
# Relative abundance
ps_RA <- microbiome::transform(ps, 'compositional')
ps_RA
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 36 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
# keep only Glomeromycota
ps2_std_G <- subset_taxa(ps_RA, phylum=="Glomeromycota")
ps2_std_G
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 36 sample variables ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# 263 taxa and 140 samples
For the bubble plot, we want to get average values based on soil_type_depth
library("metagMisc")
ps2_std_G <- phyloseq_average(
ps2_std_G,
avg_type = "arithmetic",
group = "soil_type_depth",
drop_group_zero = FALSE,
verbose = FALSE,
progress = NULL
)
ps2_std_G
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 263 taxa and 20 samples ]
## tax_table() Taxonomy Table: [ 263 taxa by 7 taxonomic ranks ]
# aggregate
ps2_std_G <- aggregate_rare(ps2_std_G, level = 'genus', detection = 0, prevalence = 0, include.lowest = TRUE)
ps2_std_G
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 17 taxa and 20 samples ]
## tax_table() Taxonomy Table: [ 17 taxa by 1 taxonomic ranks ]
tax_table_G <- as.data.frame(as.matrix(tax_table(ps2_std_G)))
OTU_genus_table_G <- as.data.frame(as.matrix(otu_table(ps2_std_G)))
# make OTUs into columns
OTU_genus_table_G <- t(OTU_genus_table_G)
OTU_genus_table_G <- as.data.frame(OTU_genus_table_G)
# add total of phylum glomeracea
OTU_genus_table_G <- cbind(
OTU_genus_table_G,
total = rowSums(OTU_genus_table_G
))
# Change your data structure from a wide format to a long format. Put any variables that are not OTUs/genus, into the id parameter
#convert data frame from a wide format to a long format
pc <- tibble::rownames_to_column(OTU_genus_table_G, "Sample")
pcm = melt(pc, id = c("Sample"))
meta_x <- meta[!duplicated(meta$soil_type_depth),]
# Vector of column names to select
columns_to_keep <- c("depth", "sample_type", "soil_type_depth")
# Using base R to select columns
meta_x <- meta_x[, columns_to_keep]
rownames(meta_x) <- NULL
# add metadata
pcm <- left_join(pcm, meta_x, by=c("Sample"="soil_type_depth"))
# change the column name
colnames(pcm)[1] <- "soil_type_depth"
pcm$soil_type_depth <- as.factor(pcm$soil_type_depth)
# Change the order of samples
pcm$soil_type_depth <- factor(pcm$soil_type_depth,levels=c("forest _ 0...10", "forest _ 10...20", "forest _ 20...30", "forest _ 30...40", "forest _ 40...", "meadow _ 0...10", "meadow _ 10...20", "meadow _ 20...30", "meadow _ 30...40", "meadow _ 40...", "organic _ 0...10", "organic _ 10...20", "organic _ 20...30", "organic _ 30...40", "organic _ 40...", "conventional _ 0...10", "conventional _ 10...20", "conventional _ 20...30", "conventional _ 30...40", "conventional _ 40..."))
levels(pcm$soil_type_depth)
## [1] "forest _ 0...10" "forest _ 10...20" "forest _ 20...30"
## [4] "forest _ 30...40" "forest _ 40..." "meadow _ 0...10"
## [7] "meadow _ 10...20" "meadow _ 20...30" "meadow _ 30...40"
## [10] "meadow _ 40..." "organic _ 0...10" "organic _ 10...20"
## [13] "organic _ 20...30" "organic _ 30...40" "organic _ 40..."
## [16] "conventional _ 0...10" "conventional _ 10...20" "conventional _ 20...30"
## [19] "conventional _ 30...40" "conventional _ 40..."
# add the other taxonomic level annotations
TAX <- as.data.frame(as.matrix(tax_table(ps)))
row.names(TAX) <- NULL
TAX <- TAX[, 1:6]
TAX <- filter(TAX, phylum == "Glomeromycota")
TAX <- TAX[!duplicated(TAX$genus),]
pcm2 <- left_join(pcm, TAX, by = c("variable" = "genus"))
pcm2$variable[pcm2$variable == "Ambisporaceae_unclassified"] <- "Ambisporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeospora"] <- "Archaeospora (g)"
pcm2$variable[pcm2$variable == "Archaeosporaceae_unclassified"] <- "Archaeosporaceae (f)"
pcm2$variable[pcm2$variable == "Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$variable[pcm2$variable == "Diversispora"] <- "Diversispora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomus"] <- "Claroideoglomus (g)"
pcm2$variable[pcm2$variable == "Entrophospora"] <- "Entrophospora (g)"
pcm2$variable[pcm2$variable == "Claroideoglomeraceae_unclassified"] <- "Claroideoglomeraceae (f)"
pcm2$variable[pcm2$variable == "Dominikia"] <- "Dominikia (g)"
pcm2$variable[pcm2$variable == "Funneliformis"] <- "Funneliformis (g)"
pcm2$variable[pcm2$variable == "Glomeraceae_unclassified"] <- "Glomeraceae (f)"
pcm2$variable[pcm2$variable == "Glomus"] <- "Glomus (g)"
pcm2$variable[pcm2$variable == "Microdominikia"] <- "Microdominikia (g)"
pcm2$variable[pcm2$variable == "Rhizophagus"] <- "Rhizophagus (g)"
pcm2$variable[pcm2$variable == "Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$variable[pcm2$variable == "Paraglomus"] <- "Paraglomus (g)"
pcm2$variable[pcm2$variable == "Paraglomerales_unclassified"] <- "Paraglomerales (o)"
pcm2$variable[pcm2$variable == "total"] <- "total"
# add total to family
pcm2$family <- pcm2$family %>% replace_na('total')
pcm2$family[pcm2$family=="Ambisporaceae"] <- "Ambisporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporaceae"] <- "Archaeosporaceae (f)"
pcm2$family[pcm2$family=="Archaeosporales_unclassified"] <- "Archaeosporales (o)"
pcm2$family[pcm2$family=="Diversisporaceae"] <- "Diversisporaceae (f)"
pcm2$family[pcm2$family=="Entrophosporaceae"] <- "Entrophosporaceae (f)"
pcm2$family[pcm2$family=="Claroideoglomeraceae"] <- "Claroideoglomeraceae (f)"
pcm2$family[pcm2$family=="Glomeraceae"] <- "Glomeraceae (f)"
pcm2$family[pcm2$family=="Glomeromycota_unclassified"] <- "Glomeromycota (p)"
pcm2$family[pcm2$family=="Paraglomeraceae"] <- "Paraglomeraceae (f)"
pcm2$family[pcm2$family=="Paraglomerales_unclassified"] <- "Paraglomerales (o)"
# make into factor
pcm2$family <- as.factor(pcm2$family)
levels(pcm2$family)
## [1] "Ambisporaceae (f)" "Archaeosporaceae (f)"
## [3] "Archaeosporales (o)" "Claroideoglomeraceae (f)"
## [5] "Diversisporaceae (f)" "Entrophosporaceae (f)"
## [7] "Glomeraceae (f)" "Glomeromycota (p)"
## [9] "Paraglomeraceae (f)" "Paraglomerales (o)"
## [11] "total"
# Change level family
pcm2$family <- factor(pcm2$family, levels = c("Ambisporaceae (f)", "Archaeosporaceae (f)", "Diversisporaceae (f)", "Glomeraceae (f)", "Claroideoglomeraceae (f)", "Entrophosporaceae (f)", "Paraglomeraceae (f)", "Glomeromycota (p)", "Archaeosporales (o)", "Paraglomerales (o)", "total"))
# make variable into factor
pcm2$variable <- as.factor(pcm2$variable)
# Change genus level order
pcm2$variable <- factor(pcm2$variable, levels = c("Archaeosporales (o)", "Ambisporaceae (f)", "Archaeosporaceae (f)", "Archaeospora (g)", "Diversispora (g)", "Glomeraceae (f)", "Dominikia (g)", "Funneliformis (g)", "Glomus (g)", "Microdominikia (g)", "Rhizophagus (g)", "Claroideoglomeraceae (f)", "Claroideoglomus (g)", "Entrophospora (g)", "Paraglomerales (o)", "Paraglomus (g)", "Glomeromycota (p)", "total"))
levels(pcm2$variable)
## [1] "Archaeosporales (o)" "Ambisporaceae (f)"
## [3] "Archaeosporaceae (f)" "Archaeospora (g)"
## [5] "Diversispora (g)" "Glomeraceae (f)"
## [7] "Dominikia (g)" "Funneliformis (g)"
## [9] "Glomus (g)" "Microdominikia (g)"
## [11] "Rhizophagus (g)" "Claroideoglomeraceae (f)"
## [13] "Claroideoglomus (g)" "Entrophospora (g)"
## [15] "Paraglomerales (o)" "Paraglomus (g)"
## [17] "Glomeromycota (p)" "total"
pcm2$AMF_guild <- NA
pcm2$AMF_guild[pcm2$family=="Ambisporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporaceae (f)"] <- "ancestral"
pcm2$AMF_guild[pcm2$family=="Archaeosporales (o)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Diversisporaceae (f)"] <- "edaphophilic"
pcm2$AMF_guild[pcm2$family=="Claroideoglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Entrophosporaceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Glomeromycota (p)"] <- "unknown"
pcm2$AMF_guild[pcm2$family=="Paraglomeraceae (f)"] <- "rhizophilic"
pcm2$AMF_guild[pcm2$family=="Paraglomerales (o)"] <- "unknown"
pcm2$new_depth <- NA
pcm2$new_depth[pcm2$depth=="0...10"]<-"0-10"
pcm2$new_depth[pcm2$depth=="10...20"]<-"10-20"
pcm2$new_depth[pcm2$depth=="20...30"]<-"20-30"
pcm2$new_depth[pcm2$depth=="30...40"]<-"30-40"
pcm2$new_depth[pcm2$depth=="40..."]<-"40-80"
Currently, AMF are divided in five orders (Archaeosporales, Diversisporales, Glomerales, Entrophosporales and Paraglomerales)
# modifying the column of data frame
pcm2$order <- as.factor(pcm2$order)
levels(pcm2$order)
## [1] "Archaeosporales" "Diversisporales"
## [3] "Entrophosporales" "Glomerales"
## [5] "Glomeromycota_unclassified" "Paraglomerales"
# Change levels
pcm2$order <- factor(pcm2$order, levels = c("Archaeosporales", "Diversisporales", "Entrophosporales", "Glomerales", "Paraglomerales", "Glomeromycota_unclassified"), labels = c("Archaeosporales (o)", "Diversisporales (o)", "Entrophosporales (o)", "Glomerales (o)", "Paraglomerales (o)", "Glomeromycota (p)"))
# add order = Glomeromycota to total
pcm2$order <- pcm2$order %>% replace_na('Glomeromycota (p)')
levels(pcm2$order)
## [1] "Archaeosporales (o)" "Diversisporales (o)" "Entrophosporales (o)"
## [4] "Glomerales (o)" "Paraglomerales (o)" "Glomeromycota (p)"
For a bubble plot, you are using geom_point and scaling the size to your value (relative abundance) column.
I checked that: - thesmallest non-zero is 8.314653e-07 or 0.0000008314653 - and the biggest value is 1.164825e-01 or 0.1164825
So lets set the limits in the figure accordingly
# color for AMF_guild
MyPalette = c("red", "blue", "#ff028d", "black")
# where ("Ancestral" = "red", "Rhizophilic" = "#ff028d", "Edaphophilic" = "blue", "Unknown" = "black")
xx2 = ggplot(pcm2, aes(x = new_depth, y = variable)) +
geom_point(aes(size = value, fill = order, color = AMF_guild), shape = 21, alpha = 1, stroke = 0) +
scale_fill_manual(values=c("#99CC99", "#83adb5", "#692D6B", "#D094D2", "#cba69e", "#A9A9A9")) +
scale_size_continuous(limits = c(0.0000001, 0.2), range = c(3,30), breaks = c(0.0000001, 0.0001, 0.001, 0.01, 0.2)) +
labs( x= "depth (cm)", y = "", size = "Relative abundance", fill = "", color = "AMF guild") +
theme(legend.key=element_blank(),
axis.text.x = element_text(colour = "black", size = 14, angle = 45, vjust = 1, hjust = 1),
axis.title.x=element_text(colour = "black", size = 14,face="bold"),
axis.text.y = element_text(colour = "black", size = 14),
legend.text = element_text(size = 14, colour ="black"),
legend.title = element_text(size = 16, face = "bold"),
panel.background = element_blank(), panel.border = element_rect(colour = "black", fill = NA, size = 1.2)) + facet_wrap(vars(sample_type), nrow = 1, ncol = 4) +
scale_y_discrete(limits = rev(levels(pcm2$variable))) + guides(fill = guide_legend(order = 1, ncol = 2, override.aes = list(size = 8, shape = 21)), color = guide_legend(order = 2, ncol = 2, override.aes = list(shape = 15, size = 8, stroke = 2)))
#+ theme(legend.margin=margin(1,1,1,1), legend.box.spacing = unit(1.6, "pt"))
#+ theme(legend.title.align=0.5)#+ theme(legend.position="bottom")
f1 <- xx2 + scale_color_manual(values = (MyPalette)) + theme(strip.text = element_text(size = 16, color = "black"))
#+ guides(color = guide_legend(nrow = 2, override.aes = list(shape = 15, size = 2, stroke = 2))) #+ coord_flip() + guide_legend(ncol=2)
### change y axis label colors based on AMF guild
f2 <- f1 + theme(axis.text.y = element_text(color = c("black", "black", "#ff028d", "black", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "#ff028d", "blue", "red", "red", "red", "black")))
print(f2)
Save with 1400 width and 550 height
I have previously tested the AMF genera, family, order and guild, but no other difference was found other than:
More Ambisporaceae in forest compared to other Wilcoxon).
So I will not include the tests here, They were done similarly as before e.g. for AMF relative abundance.
Simple spearman rank correlation with richness and env. variables are done WITHOUT forest because forest soil is so different environment compared to meadow, organic and conventional soils
library("ggpubr")
library('phyloseq')
library("cowplot")
library("dplyr")
library("ggplot2")
library("vegan")
library("microbiome")
library("tibble")
library(stringr)
library(reshape2)
library(tidyr)
setwd('\\\\ad.helsinki.fi\\home\\l\\lehakkin\\Desktop\\PROJECT_YONI_fungal_ITS')
load('ps_FINAL')
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 140 samples ]
## sample_data() Sample Data: [ 140 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
ps_nf <- subset_samples(ps, sample_type!= "forest")
meta <- meta(ps_nf)
names(meta)
## [1] "sampleID" "plot" "sampling_position"
## [4] "actual_sample_depth" "depth" "depth_numerical"
## [7] "vegetation" "sample_type" "root_mgg"
## [10] "pH_H2O" "EC_uScm" "C_g_per_kg"
## [13] "N_gkg" "TP_gkg" "Alox_mmolkg"
## [16] "Feox_mmolkg" "oxides_mmolkg" "PH2O_mgkg"
## [19] "Porg_mgkg" "DOC_mgkg" "Pinorg_mgkg"
## [22] "C_per_N" "observed" "chao1"
## [25] "shannon" "observed_sng" "chao1_sng"
## [28] "shannon_sng" "log_root" "reads"
## [31] "OTUs" "saprotroph_richness" "symbiotroph_richness"
## [34] "pathotroph_richness" "AMF_richness"
Soil properties to test against:
C_per_N pH_H2O C_g_per_kg Feox_mmolkg DOC_mgkg N_gkg depth_numerical Porg_mgkg log_root TP_gkg Pinorg_mgkg Alox_mmolkg PH2O_mgkg
env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")
for (i in env) {
# Filter out rows with NA values in the columns of interest
valid_data <- meta[!is.na(meta$observed) & !is.na(meta[[i]]), ]
# Perform Spearman correlation test
x <- cor.test(valid_data$observed, valid_data[[i]], method = "spearman")
# Print the result
print(paste("Correlation test for:", i))
print(x)
}
## [1] "Correlation test for: C_per_N"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 122420, p-value = 7.695e-15
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6239005
##
## [1] "Correlation test for: pH_H2O"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 543813, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.6707008
##
## [1] "Correlation test for: C_g_per_kg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 98808, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6964415
##
## [1] "Correlation test for: Feox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 145648, p-value = 2.38e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5525416
##
## [1] "Correlation test for: DOC_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 107044, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6711408
##
## [1] "Correlation test for: N_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 103654, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6815556
##
## [1] "Correlation test for: depth_numerical"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 568043, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.7451402
##
## [1] "Correlation test for: Porg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 108419, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6587925
##
## [1] "Correlation test for: log_root"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 147129, p-value = 3.731e-11
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5479921
##
## [1] "Correlation test for: TP_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 134591, p-value = 6.626e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5865102
##
## [1] "Correlation test for: Pinorg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 361273, p-value = 0.2224
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1099019
##
## [1] "Correlation test for: Alox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 130503, p-value = 1.582e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.59907
##
## [1] "Correlation test for: PH2O_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$observed and valid_data[[i]]
## S = 258057, p-value = 0.03667
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1878602
env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")
for (i in env) {
# Filter out rows with NA values in the columns of interest
valid_data <- meta[!is.na(meta$AMF_richness) & !is.na(meta[[i]]), ]
# Perform Spearman correlation test
x <- cor.test(valid_data$AMF_richness, valid_data[[i]], method = "spearman")
# Print the result
print(paste("Correlation test for:", i))
print(x)
}
## [1] "Correlation test for: C_per_N"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 142617, p-value = 9.284e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5618529
##
## [1] "Correlation test for: pH_H2O"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 512461, p-value = 2.496e-12
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5743796
##
## [1] "Correlation test for: C_g_per_kg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 159586, p-value = 1.27e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5097203
##
## [1] "Correlation test for: Feox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 124010, p-value = 1.425e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.6190164
##
## [1] "Correlation test for: DOC_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 157639, p-value = 7.53e-10
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5157014
##
## [1] "Correlation test for: N_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 176552, p-value = 8.089e-08
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4575968
##
## [1] "Correlation test for: depth_numerical"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 453384, p-value = 5.841e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3928852
##
## [1] "Correlation test for: Porg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 204283, p-value = 4.676e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3570961
##
## [1] "Correlation test for: log_root"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 189803, p-value = 1.324e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.4168873
##
## [1] "Correlation test for: TP_gkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 252601, p-value = 0.01205
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2239608
##
## [1] "Correlation test for: Pinorg_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 384409, p-value = 0.0434
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1809802
##
## [1] "Correlation test for: Alox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 133398, p-value = 4.39e-13
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.5901751
##
## [1] "Correlation test for: PH2O_mgkg"
##
## Spearman's rank correlation rho
##
## data: valid_data$AMF_richness and valid_data[[i]]
## S = 349401, p-value = 0.271
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.09961067
ps_nf_RA <- transform(ps_nf, "compositional")
ps_nf_RA
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 20610 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 20610 taxa by 7 taxonomic ranks ]
gm <- aggregate_rare(ps_nf_RA, level = "phylum", detection = 0, prevalence = 0 )
gm
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 14 taxa and 125 samples ]
## sample_data() Sample Data: [ 125 samples by 35 sample variables ]
## tax_table() Taxonomy Table: [ 14 taxa by 1 taxonomic ranks ]
df <- psmelt(gm)
df <- subset(df, OTU == "Glomeromycota")
env <- c("C_per_N", "pH_H2O", "C_g_per_kg", "Feox_mmolkg", "DOC_mgkg", "N_gkg", "depth_numerical", "Porg_mgkg", "log_root", "TP_gkg", "Pinorg_mgkg", "Alox_mmolkg", "PH2O_mgkg")
for (i in env) {
# Perform Spearman correlation test
x <- cor.test(df$Abundance, df[[i]], method = "spearman", na.rm = TRUE)
# Print the result
print(paste("Correlation test for:", i))
print(x)
}
## [1] "Correlation test for: C_per_N"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 338926, p-value = 0.6479
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.04124707
##
## [1] "Correlation test for: pH_H2O"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 305311, p-value = 0.492
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.06202587
##
## [1] "Correlation test for: C_g_per_kg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 369419, p-value = 0.1336
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1349274
##
## [1] "Correlation test for: Feox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 320234, p-value = 0.8579
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.01617915
##
## [1] "Correlation test for: DOC_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 377055, p-value = 0.0777
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.158386
##
## [1] "Correlation test for: N_gkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 384219, p-value = 0.0441
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1803965
##
## [1] "Correlation test for: depth_numerical"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 257422, p-value = 0.01924
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2091478
##
## [1] "Correlation test for: Porg_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 394534, p-value = 0.006855
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.2416477
##
## [1] "Correlation test for: log_root"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 368781, p-value = 0.1393
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.1329676
##
## [1] "Correlation test for: TP_gkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 430932, p-value = 0.0002288
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.323907
##
## [1] "Correlation test for: Pinorg_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 348926, p-value = 0.4251
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.07197018
##
## [1] "Correlation test for: Alox_mmolkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 313002, p-value = 0.6707
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.03839783
##
## [1] "Correlation test for: PH2O_mgkg"
##
## Spearman's rank correlation rho
##
## data: df$Abundance and df[[i]]
## S = 443246, p-value = 5.642e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.3949508